diff --git a/include/swift/IRGen/IRGenPublic.h b/include/swift/IRGen/IRGenPublic.h
new file mode 100644
index 0000000000000..a6bb01ddbdea5
--- /dev/null
+++ b/include/swift/IRGen/IRGenPublic.h
@@ -0,0 +1,37 @@
+//===---------IRGenPublic.h - Public interface to IRGen ---------*- C++ -*-===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+#ifndef SWIFT_IRGEN_IRGENPUBLIC_H
+#define SWIFT_IRGEN_IRGENPUBLIC_H
+
+namespace llvm {
+  class LLVMContext;
+}
+
+namespace swift {
+class SILModule;
+
+namespace irgen {
+
+class IRGenerator;
+class IRGenModule;
+
+/// Create an IRGen module.
+std::pair<IRGenerator *, IRGenModule *>
+createIRGenModule(SILModule *SILMod, llvm::LLVMContext &LLVMContext);
+
+/// Delete the IRGenModule and IRGenerator obtained by the above call.
+void deleteIRGenModule(std::pair<IRGenerator *, IRGenModule *> &Module);
+
+} // end namespace irgen
+} // end namespace swift
+
+#endif
diff --git a/include/swift/IRGen/IRGenSILPasses.h b/include/swift/IRGen/IRGenSILPasses.h
new file mode 100644
index 0000000000000..8f382825be19e
--- /dev/null
+++ b/include/swift/IRGen/IRGenSILPasses.h
@@ -0,0 +1,23 @@
+//===--- IRGenSILPasses.cpp - The IRGen Prepare SIL Passes ----------------===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+
+namespace swift {
+
+class SILFunctionTransform;
+
+namespace irgen {
+
+/// Create a pass to hoist alloc_stack instructions with non-fixed size.
+SILFunctionTransform *createAllocStackHoisting();
+
+} // end namespace irgen
+} // end namespace swift
diff --git a/include/swift/SIL/SILFunction.h b/include/swift/SIL/SILFunction.h
index 7f3c4f1035ad5..01306d0273008 100644
--- a/include/swift/SIL/SILFunction.h
+++ b/include/swift/SIL/SILFunction.h
@@ -618,6 +618,9 @@ class SILFunction
   SILBasicBlock &front() { return *begin(); }
   const SILBasicBlock &front() const { return *begin(); }
 
+  SILBasicBlock *getEntryBlock() { return &front(); }
+  const SILBasicBlock *getEntryBlock() const { return &front(); }
+
   SILBasicBlock *createBasicBlock();
   SILBasicBlock *createBasicBlock(SILBasicBlock *After);
 
diff --git a/include/swift/SILOptimizer/PassManager/PassManager.h b/include/swift/SILOptimizer/PassManager/PassManager.h
index 32416f000e690..339a43d7e6666 100644
--- a/include/swift/SILOptimizer/PassManager/PassManager.h
+++ b/include/swift/SILOptimizer/PassManager/PassManager.h
@@ -32,11 +32,18 @@ class SILModuleTransform;
 class SILOptions;
 class SILTransform;
 
+namespace irgen {
+class IRGenModule;
+}
+
 /// \brief The SIL pass manager.
 class SILPassManager {
   /// The module that the pass manager will transform.
   SILModule *Mod;
 
+  /// An optional IRGenModule associated with this PassManager.
+  irgen::IRGenModule *IRMod;
+
   /// The list of transformations to run.
   llvm::SmallVector<SILTransform *, 16> Transformations;
 
@@ -90,11 +97,20 @@ class SILPassManager {
   /// same function.
   bool RestartPipeline = false;
 
+
+  /// The IRGen SIL passes. These have to be dynamically added by IRGen.
+  llvm::DenseMap<unsigned, SILFunctionTransform *> IRGenPasses;
+
 public:
   /// C'tor. It creates and registers all analysis passes, which are defined
   /// in Analysis.def.
   SILPassManager(SILModule *M, llvm::StringRef Stage = "");
 
+  /// C'tor. It creates an IRGen pass manager. Passes can query for the
+  /// IRGenModule.
+  SILPassManager(SILModule *M, irgen::IRGenModule *IRMod,
+                 llvm::StringRef Stage = "");
+
   const SILOptions &getOptions() const;
 
   /// \brief Searches for an analysis of type T in the list of registered
@@ -111,6 +127,10 @@ class SILPassManager {
   /// \returns the module that the pass manager owns.
   SILModule *getModule() { return Mod; }
 
+  /// \returns the associated IGenModule or null if this is not an IRGen
+  /// pass manager.
+  irgen::IRGenModule *getIRGenModule() { return IRMod; }
+
   /// \brief Run one iteration of the optimization pipeline.
   void runOneIteration();
 
@@ -218,6 +238,15 @@ class SILPassManager {
     }
   }
 
+  void registerIRGenPass(PassKind Kind, SILFunctionTransform *Transform) {
+    assert(IRGenPasses.find(unsigned(Kind)) == IRGenPasses.end() &&
+           "Pass already registered");
+    assert(
+        IRMod &&
+        "Attempting to register an IRGen pass with a non-IRGen pass manager");
+    IRGenPasses[unsigned(Kind)] = Transform;
+  }
+
 private:
   void execute() {
     runOneIteration();
diff --git a/include/swift/SILOptimizer/PassManager/PassPipeline.def b/include/swift/SILOptimizer/PassManager/PassPipeline.def
index 8b0ddcd583e77..71a78e017c5b5 100644
--- a/include/swift/SILOptimizer/PassManager/PassPipeline.def
+++ b/include/swift/SILOptimizer/PassManager/PassPipeline.def
@@ -35,6 +35,7 @@ PASSPIPELINE(OwnershipEliminator, "Utility pass to just run the ownership elimin
 PASSPIPELINE_WITH_OPTIONS(Performance, "Passes run at -O")
 PASSPIPELINE(Onone, "Passes run at -Onone")
 PASSPIPELINE(InstCount, "Utility pipeline to just run the inst count pass")
+PASSPIPELINE(IRGenPrepare, "Pipeline to run during IRGen")
 
 #undef PASSPIPELINE_WITH_OPTIONS
 #undef PASSPIPELINE
diff --git a/include/swift/SILOptimizer/PassManager/Passes.def b/include/swift/SILOptimizer/PassManager/Passes.def
index 00f198aa9d78c..90550b1f3e461 100644
--- a/include/swift/SILOptimizer/PassManager/Passes.def
+++ b/include/swift/SILOptimizer/PassManager/Passes.def
@@ -23,6 +23,14 @@
 #error "Macro must be defined by includer"
 #endif
 
+/// IRGEN_PASS(Id, Name, Description)
+///   The pass is identified by PassKind::Id.
+///   An IRGen pass is created by IRGen and needs to be register with the pass
+///   manager dynamically.
+#ifndef IRGEN_PASS
+#define IRGEN_PASS(Id, Name, Description) PASS(Id, Name, Description)
+#endif
+
 /// PASS_RANGE(RANGE_ID, START, END)
 ///   Pass IDs between PassKind::START and PassKind::END, inclusive,
 ///   fall within the set known as
@@ -36,6 +44,8 @@ PASS(ABCOpt, "abcopts",
      "Optimization of array bounds checks")
 PASS(AllocBoxToStack, "allocbox-to-stack",
      "Promote heap allocations to stack allocations")
+IRGEN_PASS(AllocStackHoisting, "alloc-stack-hoisting",
+           "Hoist generic alloc_stack instructions to the entry block")
 PASS(ArrayCountPropagation, "array-count-propagation",
      "Propagate the count of arrays")
 PASS(ArrayElementPropagation, "array-element-propagation",
@@ -243,5 +253,6 @@ PASS(BugReducerTester, "bug-reducer-tester",
      "Utility pass for testing sil-bug-reducer. Asserts when visits an apply that calls a specific function")
 PASS_RANGE(AllPasses, AADumper, BugReducerTester)
 
+#undef IRGEN_PASS
 #undef PASS
 #undef PASS_RANGE
diff --git a/include/swift/SILOptimizer/PassManager/Passes.h b/include/swift/SILOptimizer/PassManager/Passes.h
index 1c2e580223e8c..1b2c166e2e9b3 100644
--- a/include/swift/SILOptimizer/PassManager/Passes.h
+++ b/include/swift/SILOptimizer/PassManager/Passes.h
@@ -23,6 +23,10 @@ namespace swift {
   class SILOptions;
   class SILTransform;
 
+  namespace irgen {
+    class IRGenModule;
+  }
+
   /// \brief Run all the SIL diagnostic passes on \p M.
   ///
   /// \returns true if the diagnostic passes produced an error
@@ -74,6 +78,7 @@ namespace swift {
   StringRef PassKindID(PassKind Kind);
 
 #define PASS(ID, NAME, DESCRIPTION) SILTransform *create##ID();
+#define IRGEN_PASS(ID, NAME, DESCRIPTION)
 #include "Passes.def"
 
 } // end namespace swift
diff --git a/include/swift/SILOptimizer/PassManager/Transforms.h b/include/swift/SILOptimizer/PassManager/Transforms.h
index 954a7fe1e28d3..3391b0f09b187 100644
--- a/include/swift/SILOptimizer/PassManager/Transforms.h
+++ b/include/swift/SILOptimizer/PassManager/Transforms.h
@@ -116,6 +116,12 @@ namespace swift {
   protected:
     SILFunction *getFunction() { return F; }
 
+    irgen::IRGenModule *getIRGenModule() {
+      auto *Mod = PM->getIRGenModule();
+      assert(Mod && "Expecting a valid module");
+      return Mod;
+    }
+
     void invalidateAnalysis(SILAnalysis::InvalidationKind K) {
       PM->invalidateAnalysis(F, K);
     }
diff --git a/lib/IRGen/Address.h b/lib/IRGen/Address.h
index 4c3c25ccb8cd7..a55e4e224d075 100644
--- a/lib/IRGen/Address.h
+++ b/lib/IRGen/Address.h
@@ -104,6 +104,33 @@ class ContainedAddress {
   bool isValid() const { return Addr.isValid(); }
 };
 
+/// An address on the stack together with an optional stack pointer reset
+/// location.
+class StackAddress {
+  /// The address of an object of type T.
+  Address Addr;
+  /// The stack pointer location to reset to when this stack object is
+  /// deallocated.
+  llvm::Value *StackPtrResetLocation;
+
+public:
+  StackAddress() : StackPtrResetLocation(nullptr) {}
+  StackAddress(Address address)
+    : Addr(address), StackPtrResetLocation(nullptr) {}
+  StackAddress(Address address, llvm::Value *SP)
+      : Addr(address), StackPtrResetLocation(SP) {}
+
+  llvm::Value *getAddressPointer() const { return Addr.getAddress(); }
+  Alignment getAlignment() const { return Addr.getAlignment(); }
+  Address getAddress() const { return Addr; }
+  bool needsSPRestore() const { return StackPtrResetLocation != nullptr; }
+  llvm::Value *getSavedSP() const {
+    assert(StackPtrResetLocation && "Expect a valid stacksave");
+    return StackPtrResetLocation; }
+
+  bool isValid() const { return Addr.isValid(); }
+};
+
 } // end namespace irgen
 } // end namespace swift
 
diff --git a/lib/IRGen/AllocStackHoisting.cpp b/lib/IRGen/AllocStackHoisting.cpp
new file mode 100644
index 0000000000000..e04e3f0a1ca6c
--- /dev/null
+++ b/lib/IRGen/AllocStackHoisting.cpp
@@ -0,0 +1,430 @@
+//===--- AllocStackHoisting.cpp - Hoist alloc_stack instructions ----------===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "alloc-stack-hoisting"
+
+#include "swift/IRGen/IRGenSILPasses.h"
+#include "swift/SILOptimizer/Analysis/Analysis.h"
+#include "swift/SILOptimizer/PassManager/Passes.h"
+#include "swift/SILOptimizer/PassManager/Transforms.h"
+#include "swift/SIL/DebugUtils.h"
+#include "swift/SIL/SILBuilder.h"
+#include "swift/SIL/SILInstruction.h"
+#include "swift/SIL/SILArgument.h"
+
+#include "IRGenModule.h"
+#include "NonFixedTypeInfo.h"
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace swift;
+
+llvm::cl::opt<bool> SILUseStackSlotMerging(
+    "sil-merge-stack-slots", llvm::cl::init(true),
+    llvm::cl::desc("Merge generic alloc_stack instructions"));
+
+/// Hoist generic alloc_stack instructions to the entry basic block and merge
+/// alloc_stack instructions if their users span non-overlapping live-ranges.
+///
+/// This helps avoid llvm.stacksave/stackrestore intrinsic calls during code
+/// generation. IRGen will only dynamic alloca instructions if the alloc_stack
+/// is in the entry block but will emit a dynamic alloca and
+/// llvm.stacksave/stackrestore for all other basic blocks.
+///
+/// Merging alloc_stack instructions saves code size and stack size.
+
+/// An alloc_stack instructions is hoistable if it is of generic type and the
+/// type parameter is not dependent on an opened type.
+static bool isHoistable(AllocStackInst *Inst, irgen::IRGenModule &Mod) {
+  auto SILTy = Inst->getType();
+  // We don't need to hoist types that have reference semantics no dynamic
+  // alloca will be generated as they are fixed size.
+  if (SILTy.hasReferenceSemantics())
+    return false;
+
+  // Only hoist types that are dynamically sized (generics and resilient types).
+  auto &TI = Mod.getTypeInfo(SILTy);
+  if (TI.isFixedSize())
+    return false;
+
+  // Don't hoist generics with opened archetypes. We would have to hoist the
+  // open archetype instruction which might not be possible.
+  if (!Inst->getTypeDependentOperands().empty())
+    return false;
+  return true;
+}
+
+/// A partition of alloc_stack instructions.
+///
+/// Initially, a partition contains alloc_stack instructions of one type.
+/// After merging non-overlapping alloc_stack live ranges, a partition contains
+/// a set of alloc_stack instructions that can be assigned a single stack
+/// location.
+namespace {
+class Partition {
+public:
+  SmallVector<AllocStackInst *, 4> Elts;
+
+  Partition(AllocStackInst *A) : Elts(1, A) {}
+  Partition() {}
+
+  /// Assign a single alloc_stack instruction to all the alloc_stacks in the
+  /// partiion.
+  ///
+  /// This assumes that the live ranges of the alloc_stack instructions are
+  /// non-overlapping.
+  void assignStackLocation(SmallVectorImpl<SILInstruction *> &FunctionExits);
+};
+} // end anonymous namespace.
+
+/// Erases all dealloc_stack users of an alloc_stack
+static void eraseDeallocStacks(AllocStackInst *AllocStack) {
+  // Delete dealloc_stacks.
+  SmallVector<DeallocStackInst *, 16> DeallocStacksToDelete;
+  for (auto *U : AllocStack->getUses()) {
+    if (auto *DeallocStack = dyn_cast<DeallocStackInst>(U->getUser()))
+      DeallocStacksToDelete.push_back(DeallocStack);
+  }
+  for (auto *D : DeallocStacksToDelete)
+    D->eraseFromParent();
+}
+
+/// Inserts a dealloc_stack at all the function exits.
+static void
+insertDeallocStackAtEndOf(SmallVectorImpl<SILInstruction *> &FunctionExits,
+                          AllocStackInst *AllocStack) {
+  // Insert dealloc_stack in the exit blocks.
+  for (auto *Exit : FunctionExits) {
+    SILBuilder Builder(Exit);
+    Builder.createDeallocStack(AllocStack->getLoc(), AllocStack);
+  }
+}
+
+/// Assign a single alloc_stack instruction to all the alloc_stacks in the
+/// partiion.
+void Partition::assignStackLocation(
+    SmallVectorImpl<SILInstruction *> &FunctionExits) {
+  assert(!Elts.empty() && "Must have a least one location");
+  // The assigned location is the first alloc_stack in our partition.
+  auto *AssignedLoc = Elts[0];
+
+  // Move this assigned location to the beginning of the entry block.
+  auto *EntryBB = AssignedLoc->getFunction()->getEntryBlock();
+  AssignedLoc->removeFromParent();
+  EntryBB->push_front(AssignedLoc);
+
+  // Erase the dealloc_stacks.
+  eraseDeallocStacks(AssignedLoc);
+
+  // Insert a new dealloc_stack at the exit(s) of the function.
+  insertDeallocStackAtEndOf(FunctionExits, AssignedLoc);
+
+  // Rewrite all the other alloc_stacks in the partition to use the assigned
+  // location.
+  for (auto *AllocStack : Elts) {
+    if (AssignedLoc == AllocStack) continue;
+    eraseDeallocStacks(AllocStack);
+    AllocStack->replaceAllUsesWith(AssignedLoc);
+    AllocStack->eraseFromParent();
+  }
+}
+
+/// Returns a single dealloc_stack user of the alloc_stack or nullptr otherwise.
+static SILInstruction *getSingleDeallocStack(AllocStackInst *ASI) {
+  SILInstruction *Dealloc = nullptr;
+  for (auto *U : ASI->getUses()) {
+    auto *Inst = U->getUser();
+    if (isa<DeallocStackInst>(Inst)) {
+      if (Dealloc == nullptr) {
+        Dealloc = Inst;
+        continue;
+      }
+      // Already saw a dealloc_stack.
+      return nullptr;
+    }
+  }
+  assert(Dealloc != nullptr);
+  return Dealloc;
+}
+
+namespace {
+/// Compute liveness for the partition to allow for an interference check
+/// between two alloc_stack instructions.
+///
+/// For now now liveness is computed and  this just performs a simple check
+/// whether two regions of alloc_stack instructions might overlap.
+class Liveness {
+public:
+  Liveness(Partition &P) {}
+
+  /// Check whether the live ranges of the two alloc_stack instructions
+  /// might overlap.
+  ///
+  /// Currently this does not use a liveness analysis. Rather we check that for
+  /// both alloc_stack we have:
+  /// * a single dealloc_stack user
+  /// * the dealloc_stack is in the same basic block
+  /// If the alloc_stack instructions are in different basic blocks we know that
+  /// the live-ranges can't overlap.
+  /// If they are in the same basic block we scan the basic block to determine
+  /// whether one dealloc_stack dominates the other alloc_stack. If this is the
+  /// case the live ranges can't overlap.
+  bool mayOverlap(AllocStackInst *A, AllocStackInst *B) {
+    assert(A != B);
+
+    // Check that we have a single dealloc_stack user in the same block.
+    auto *singleDeallocA = getSingleDeallocStack(A);
+    if (singleDeallocA == nullptr ||
+        singleDeallocA->getParent() != A->getParent())
+      return true;
+    auto *singleDeallocB = getSingleDeallocStack(B);
+    if (singleDeallocB == nullptr ||
+        singleDeallocB->getParent() != B->getParent())
+      return true;
+
+    // Different basic blocks.
+    if (A->getParent() != B->getParent())
+      return false;
+    bool ALive = false;
+    bool BLive = false;
+    for (auto &Inst : *A->getParent()) {
+      if (A == &Inst) {
+        ALive = true;
+      } else if (singleDeallocA == &Inst) {
+        ALive = false;
+      } else if (B == &Inst) {
+        BLive = true;
+      } else if (singleDeallocB == &Inst) {
+        BLive = false;
+      }
+
+      if (ALive && BLive)
+        return true;
+    }
+    return false;
+  }
+};
+} // end anonymous namespace.
+
+namespace {
+/// Merge alloc_stack instructions.
+///
+/// This merges alloc_stack instructions of one type by:
+/// * building partitions of alloc_stack instructions of one type
+/// * merging alloc_stack instructions in each partition into one alloc_stack
+///   if the live ranges spanned by the alloc_stack users are known not to
+///   overlap.
+class MergeStackSlots {
+  /// Contains partitions of alloc_stack instructions by type.
+  SmallVector<Partition, 2> PartitionByType;
+  /// The function exits.
+  SmallVectorImpl<SILInstruction *> &FunctionExits;
+
+public:
+  MergeStackSlots(SmallVectorImpl<AllocStackInst *> &AllocStacks,
+                  SmallVectorImpl<SILInstruction *> &FuncExits);
+
+  /// Merge alloc_stack instructions if possible and hoist them to the entry
+  /// block.
+  void mergeSlots();
+};
+} // end anonymous namespace.
+
+MergeStackSlots::MergeStackSlots(SmallVectorImpl<AllocStackInst *> &AllocStacks,
+                                 SmallVectorImpl<SILInstruction *> &FuncExits)
+    : FunctionExits(FuncExits) {
+  // Build initial partitions based on the type.
+  llvm::DenseMap<SILType, unsigned> TypeToPartitionMap;
+  for (auto *AS : AllocStacks) {
+    auto Ty = AS->getType();
+    auto It = TypeToPartitionMap.find(Ty);
+    if (It != TypeToPartitionMap.end()) {
+      PartitionByType[It->second].Elts.push_back(AS);
+    } else {
+      PartitionByType.push_back(Partition(AS));
+      TypeToPartitionMap[Ty] = PartitionByType.size() - 1;
+    }
+  }
+}
+
+/// Merge alloc_stack instructions if possible and hoist them to the entry
+/// block.
+void MergeStackSlots::mergeSlots() {
+  for (auto &PartitionOfOneType : PartitionByType) {
+    Liveness Live(PartitionOfOneType);
+
+    // Paritions that are know to contain non-overlapping alloc_stack
+    // live-ranges.
+    SmallVector<Partition, 4> DisjointPartitions(1, Partition());
+
+    // Look at all the alloc_stacks of one type.
+    for (auto *CurAllocStack : PartitionOfOneType.Elts) {
+      bool FoundAPartition = false;
+      // Check if we can add it to an existing parition that we have show to be
+      // non-interfering.
+      for (auto &CandidateP : DisjointPartitions) {
+        // If the candidate partition is empty (the very first time we look at an
+        // alloc_stack) we can just add the alloc_stack.
+        if (CandidateP.Elts.empty()) {
+          CandidateP.Elts.push_back(CurAllocStack);
+          FoundAPartition = true;
+          break;
+        }
+        // Otherwise, we check interference of the current alloc_stack with the
+        // candidate partition.
+        bool InterferesWithCandidateP = false;
+        for (auto *AllocStackInParition : CandidateP.Elts) {
+          if (Live.mayOverlap(AllocStackInParition, CurAllocStack)) {
+            InterferesWithCandidateP = true;
+            break;
+          }
+        }
+        // No interference add the current alloc_stack to the candidate
+        // partition.
+        if (InterferesWithCandidateP == false) {
+          CandidateP.Elts.push_back(CurAllocStack);
+          FoundAPartition = true;
+          break;
+        }
+        // Otherwise, we look at the next partition.
+      }
+      // If not partition was found add a new one.
+      if (FoundAPartition == false) {
+        DisjointPartitions.push_back(Partition(CurAllocStack));
+      }
+    }
+
+    // Assign stack locations to disjoint partition hoisting alloc_stacks to the
+    // entry block at the same time.
+    for (auto &Par : DisjointPartitions) {
+      Par.assignStackLocation(FunctionExits);
+    }
+  }
+}
+
+
+namespace {
+/// Hoist alloc_stack instructions to the entry block and merge them.
+class HoistAllocStack {
+  /// The function to process.
+  SILFunction *F;
+  /// The current IRGenModule.
+  irgen::IRGenModule &IRGenMod;
+
+  SmallVector<AllocStackInst *, 16> AllocStackToHoist;
+  SmallVector<SILInstruction *, 8> FunctionExits;
+
+public:
+  HoistAllocStack(SILFunction *F, irgen::IRGenModule &Mod)
+      : F(F), IRGenMod(Mod) {}
+
+  /// Try to hoist generic alloc_stack instructions to the entry block.
+  /// Returns true if the function was changed.
+  bool run();
+
+private:
+  /// Collect generic alloc_stack instructions that can be moved to the entry
+  /// block.
+  void collectHoistableInstructions();
+
+  /// Move the hoistable alloc_stack instructions to the entry block.
+  void hoist();
+};
+}
+
+/// Collect generic alloc_stack instructions in the current function can be
+/// hoisted.
+/// We can hoist generic alloc_stack instructions if they are not dependent on a
+/// another instruction that we would have to hoist.
+/// A generic alloc_stack could reference an opened archetype that was not
+/// opened in the entry block.
+void HoistAllocStack::collectHoistableInstructions() {
+  for (auto &BB : *F) {
+    for (auto &Inst : BB) {
+      // Terminators that are function exits are our dealloc_stack
+      // insertion points.
+      if (auto *Term = dyn_cast<TermInst>(&Inst)) {
+        if (Term->isFunctionExiting())
+          FunctionExits.push_back(Term);
+        continue;
+      }
+
+      auto *ASI = dyn_cast<AllocStackInst>(&Inst);
+      if (!ASI) {
+        continue;
+      }
+      if (isHoistable(ASI, IRGenMod)) {
+        DEBUG(llvm::dbgs() << "Hoisting     " << Inst);
+        AllocStackToHoist.push_back(ASI);
+      } else {
+        DEBUG(llvm::dbgs() << "Not hoisting " << Inst);
+      }
+    }
+  }
+}
+
+/// Hoist the alloc_stack instructions to the entry block and sink the
+/// dealloc_stack instructions to the function exists.
+void HoistAllocStack::hoist() {
+
+  if (SILUseStackSlotMerging) {
+    MergeStackSlots Merger(AllocStackToHoist, FunctionExits);
+    Merger.mergeSlots();
+  } else {
+    // Hoist alloc_stacks to the entry block and delete dealloc_stacks.
+    auto *EntryBB = F->getEntryBlock();
+    for (auto *AllocStack : AllocStackToHoist) {
+      // Insert at the beginning of the entry block.
+      AllocStack->removeFromParent();
+      EntryBB->push_front(AllocStack);
+      // Delete dealloc_stacks.
+      eraseDeallocStacks(AllocStack);
+    }
+    // Insert dealloc_stack in the exit blocks.
+    for (auto *AllocStack : AllocStackToHoist) {
+      insertDeallocStackAtEndOf(FunctionExits, AllocStack);
+    }
+  }
+}
+
+/// Try to hoist generic alloc_stack instructions to the entry block.
+/// Returns true if the function was changed.
+bool HoistAllocStack::run() {
+  collectHoistableInstructions();
+
+  // Nothing to hoist?
+  if (AllocStackToHoist.empty())
+    return false;
+
+  hoist();
+  return true;
+}
+
+namespace {
+class AllocStackHoisting : public SILFunctionTransform {
+  void run() override {
+    auto *F = getFunction();
+    auto *Mod = getIRGenModule();
+    assert(Mod && "This pass must be run as part of an IRGen pipeline");
+    bool Changed = HoistAllocStack(F, *Mod).run();
+    if (Changed) {
+      PM->invalidateAnalysis(F, SILAnalysis::InvalidationKind::Instructions);
+    }
+  }
+  StringRef getName() override { return "alloc_stack Hoisting"; }
+};
+} // end anonymous namespace
+
+SILFunctionTransform *irgen::createAllocStackHoisting() {
+  return new AllocStackHoisting();
+}
diff --git a/lib/IRGen/CMakeLists.txt b/lib/IRGen/CMakeLists.txt
index 22723d0cd5ea1..3749f7cd320e3 100644
--- a/lib/IRGen/CMakeLists.txt
+++ b/lib/IRGen/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_swift_library(swiftIRGen STATIC
+  AllocStackHoisting.cpp
   DebugTypeInfo.cpp
   EnumPayload.cpp
   ExtraInhabitants.cpp
@@ -45,6 +46,7 @@ add_swift_library(swiftIRGen STATIC
     swiftLLVMPasses
     swiftSIL
     swiftSILGen
+    swiftSILOptimizer
 
     # Clang dependencies.
     # FIXME: Clang should really export these in some reasonable manner.
diff --git a/lib/IRGen/FixedTypeInfo.h b/lib/IRGen/FixedTypeInfo.h
index b21ed7e5d13bf..1a10dc63693fa 100644
--- a/lib/IRGen/FixedTypeInfo.h
+++ b/lib/IRGen/FixedTypeInfo.h
@@ -76,10 +76,10 @@ class FixedTypeInfo : public TypeInfo {
     return (isFixedSize(expansion) && StorageSize.isZero());
   }
 
-  ContainedAddress allocateStack(IRGenFunction &IGF, SILType T,
-                                 const llvm::Twine &name) const override;
-  void deallocateStack(IRGenFunction &IGF, Address addr, SILType T) const override;
-  void destroyStack(IRGenFunction &IGF, Address addr, SILType T) const override;
+  StackAddress allocateStack(IRGenFunction &IGF, SILType T, bool isEntryBlock,
+                             const llvm::Twine &name) const override;
+  void deallocateStack(IRGenFunction &IGF, StackAddress addr, SILType T) const override;
+  void destroyStack(IRGenFunction &IGF, StackAddress addr, SILType T) const override;
 
   // We can give these reasonable default implementations.
 
diff --git a/lib/IRGen/GenCall.cpp b/lib/IRGen/GenCall.cpp
index 2b3320ac32398..f7e0608275c06 100644
--- a/lib/IRGen/GenCall.cpp
+++ b/lib/IRGen/GenCall.cpp
@@ -1214,15 +1214,15 @@ void CallEmission::emitToExplosion(Explosion &out) {
   // If the call is naturally to memory, emit it that way and then
   // explode that temporary.
   if (LastArgWritten == 1) {
-    ContainedAddress ctemp = substResultTI.allocateStack(IGF, substResultType,
-                                                         "call.aggresult");
+    StackAddress ctemp = substResultTI.allocateStack(IGF, substResultType,
+                                                     false, "call.aggresult");
     Address temp = ctemp.getAddress();
     emitToMemory(temp, substResultTI);
  
     // We can use a take.
     substResultTI.loadAsTake(IGF, temp, out);
 
-    substResultTI.deallocateStack(IGF, ctemp.getContainer(), substResultType);
+    substResultTI.deallocateStack(IGF, ctemp, substResultType);
     return;
   }
 
@@ -1381,7 +1381,7 @@ static void emitCoerceAndExpand(IRGenFunction &IGF,
 
   // Otherwise, materialize to a temporary.
   Address temporary =
-    paramTI.allocateStack(IGF, paramTy, "coerce-and-expand.temp").getAddress();
+    paramTI.allocateStack(IGF, paramTy, false, "coerce-and-expand.temp").getAddress();
 
   auto coercionTyLayout = IGF.IGM.DataLayout.getStructLayout(coercionTy);
 
@@ -1440,7 +1440,7 @@ static void emitCoerceAndExpand(IRGenFunction &IGF,
     paramTI.loadAsTake(IGF, temporary, out);
   }
 
-  paramTI.deallocateStack(IGF, temporary, paramTy);
+  paramTI.deallocateStack(IGF, StackAddress(temporary), paramTy);
 }
 
 static void emitDirectExternalArgument(IRGenFunction &IGF,
@@ -1476,6 +1476,7 @@ static void emitDirectExternalArgument(IRGenFunction &IGF,
 
   // Store to a temporary.
   Address temporary = argTI.allocateStack(IGF, argType,
+                                          false,
                                           "coerced-arg").getAddress();
   argTI.initializeFromParams(IGF, in, temporary, argType);
 
@@ -1497,7 +1498,7 @@ static void emitDirectExternalArgument(IRGenFunction &IGF,
     out.add(IGF.Builder.CreateLoad(coercedAddr));
   }
 
-  argTI.deallocateStack(IGF, temporary, argType);
+  argTI.deallocateStack(IGF, StackAddress(temporary), argType);
 }
 
 namespace {
@@ -1547,7 +1548,7 @@ static void emitClangExpandedArgument(IRGenFunction &IGF,
   }
 
   // Otherwise, materialize to a temporary.
-  Address temp = swiftTI.allocateStack(IGF, swiftType,
+  Address temp = swiftTI.allocateStack(IGF, swiftType, false,
                                        "clang-expand-arg.temp").getAddress();
   swiftTI.initialize(IGF, in, temp);
 
@@ -1569,7 +1570,7 @@ void irgen::emitClangExpandedParameter(IRGenFunction &IGF,
   }
 
   // Otherwise, materialize to a temporary.
-  Address temp = swiftTI.allocateStack(IGF, swiftType,
+  Address temp = swiftTI.allocateStack(IGF, swiftType, false,
                                        "clang-expand-param.temp").getAddress();
   Address castTemp = IGF.Builder.CreateBitCast(temp, IGF.IGM.Int8PtrTy);
   ClangExpandStoreEmitter(IGF, in).visit(clangType, castTemp);
@@ -1652,7 +1653,7 @@ static void externalizeArguments(IRGenFunction &IGF, const Callee &callee,
     }
     case clang::CodeGen::ABIArgInfo::Indirect: {
       auto &ti = cast<LoadableTypeInfo>(IGF.getTypeInfo(paramType));
-      Address addr = ti.allocateStack(IGF, paramType,
+      Address addr = ti.allocateStack(IGF, paramType, false,
                                       "indirect-temporary").getAddress();
       ti.initialize(IGF, in, addr);
 
@@ -1779,7 +1780,7 @@ static void emitDirectForeignParameter(IRGenFunction &IGF,
 
   // Deallocate the temporary.
   // `deallocateStack` emits the lifetime.end marker for us.
-  paramTI.deallocateStack(IGF, temporary, paramType);
+  paramTI.deallocateStack(IGF, StackAddress(temporary), paramType);
 }
 
 void irgen::emitForeignParameter(IRGenFunction &IGF, Explosion &params,
diff --git a/lib/IRGen/GenFunc.cpp b/lib/IRGen/GenFunc.cpp
index e6626e0c309e5..e531211236d0b 100644
--- a/lib/IRGen/GenFunc.cpp
+++ b/lib/IRGen/GenFunc.cpp
@@ -778,7 +778,7 @@ static llvm::Function *emitPartialApplicationForwarder(IRGenModule &IGM,
   struct AddressToDeallocate {
     SILType Type;
     const TypeInfo &TI;
-    Address Addr;
+    StackAddress Addr;
   };
   SmallVector<AddressToDeallocate, 4> addressesToDeallocate;
 
@@ -958,14 +958,14 @@ static llvm::Function *emitPartialApplicationForwarder(IRGenModule &IGM,
         // The +1 argument is passed indirectly, so we need to copy into a
         // temporary.
         needsAllocas = true;
-        auto caddr = fieldTI.allocateStack(subIGF, fieldTy, "arg.temp");
-        fieldTI.initializeWithCopy(subIGF, caddr.getAddress(), fieldAddr,
-                                   fieldTy);
-        param.add(caddr.getAddressPointer());
+        auto stackAddr = fieldTI.allocateStack(subIGF, fieldTy, false, "arg.temp");
+        auto addressPointer = stackAddr.getAddress().getAddress();
+        fieldTI.initializeWithCopy(subIGF, stackAddr.getAddress(), fieldAddr, fieldTy);
+        param.add(addressPointer);
         
         // Remember to deallocate later.
         addressesToDeallocate.push_back(
-                  AddressToDeallocate{fieldTy, fieldTI, caddr.getContainer()});
+            AddressToDeallocate{fieldTy, fieldTI, stackAddr});
 
         break;
       }
diff --git a/lib/IRGen/GenInit.cpp b/lib/IRGen/GenInit.cpp
index 478edca8d2adf..94c084f8e8dca 100644
--- a/lib/IRGen/GenInit.cpp
+++ b/lib/IRGen/GenInit.cpp
@@ -63,30 +63,31 @@ Address IRGenModule::emitSILGlobalVariable(SILGlobalVariable *var) {
   return addr;
 }
 
-ContainedAddress FixedTypeInfo::allocateStack(IRGenFunction &IGF, SILType T,
-                                              const Twine &name) const {
+StackAddress FixedTypeInfo::allocateStack(IRGenFunction &IGF, SILType T,
+                                          bool isEntryBlock,
+                                          const Twine &name) const {
   // If the type is known to be empty, don't actually allocate anything.
   if (isKnownEmpty(ResilienceExpansion::Maximal)) {
     auto addr = getUndefAddress();
-    return { addr, addr };
+    return { addr };
   }
 
   Address alloca =
     IGF.createAlloca(getStorageType(), getFixedAlignment(), name);
   IGF.Builder.CreateLifetimeStart(alloca, getFixedSize());
   
-  return { alloca, alloca };
+  return { alloca };
 }
 
-void FixedTypeInfo::destroyStack(IRGenFunction &IGF, Address addr,
+void FixedTypeInfo::destroyStack(IRGenFunction &IGF, StackAddress addr,
                                  SILType T) const {
-  destroy(IGF, addr, T);
+  destroy(IGF, addr.getAddress(), T);
   FixedTypeInfo::deallocateStack(IGF, addr, T);
 }
 
-void FixedTypeInfo::deallocateStack(IRGenFunction &IGF, Address addr,
+void FixedTypeInfo::deallocateStack(IRGenFunction &IGF, StackAddress addr,
                                     SILType T) const {
   if (isKnownEmpty(ResilienceExpansion::Maximal))
     return;
-  IGF.Builder.CreateLifetimeEnd(addr, getFixedSize());
+  IGF.Builder.CreateLifetimeEnd(addr.getAddress(), getFixedSize());
 }
diff --git a/lib/IRGen/GenOpaque.cpp b/lib/IRGen/GenOpaque.cpp
index 9e68e9131d07d..d1b0fc8bcab38 100644
--- a/lib/IRGen/GenOpaque.cpp
+++ b/lib/IRGen/GenOpaque.cpp
@@ -435,6 +435,44 @@ llvm::Value *irgen::emitInitializeBufferWithCopyOfBufferCall(IRGenFunction &IGF,
   return call;
 }
 
+/// Emit a dynamic alloca call to allocate enough memory to hold an object of
+/// type 'T' and an optional llvm.stackrestore point if 'isInEntryBlock' is
+/// false.
+DynamicAlloca irgen::emitDynamicAlloca(IRGenFunction &IGF, SILType T,
+                                       bool isInEntryBlock) {
+  llvm::Value *stackRestorePoint = nullptr;
+
+  // Save the stack pointer if we are not in the entry block (we could be
+  // executed more than once).
+  if (!isInEntryBlock) {
+    auto *stackSaveFn = llvm::Intrinsic::getDeclaration(
+        &IGF.IGM.Module, llvm::Intrinsic::ID::stacksave);
+
+    stackRestorePoint =  IGF.Builder.CreateCall(stackSaveFn, {}, "spsave");
+  }
+
+  // Emit the dynamic alloca.
+  llvm::Value *size = emitLoadOfSize(IGF, T);
+  auto *alloca = IGF.Builder.CreateAlloca(IGF.IGM.Int8Ty, size, "alloca");
+  alloca->setAlignment(16);
+  assert(!isInEntryBlock ||
+         IGF.getActiveDominancePoint().isUniversal() &&
+             "Must be in entry block if we insert dynamic alloca's without "
+             "stackrestores");
+  return {alloca, stackRestorePoint};
+}
+
+/// Deallocate dynamic alloca's memory if requested by restoring the stack
+/// location before the dynamic alloca's call.
+void irgen::emitDeallocateDynamicAlloca(IRGenFunction &IGF,
+                                        StackAddress address) {
+  if (!address.needsSPRestore())
+    return;
+  auto *stackRestoreFn = llvm::Intrinsic::getDeclaration(
+      &IGF.IGM.Module, llvm::Intrinsic::ID::stackrestore);
+  IGF.Builder.CreateCall(stackRestoreFn, address.getSavedSP());
+}
+
 /// Emit a call to do an 'allocateBuffer' operation.
 llvm::Value *irgen::emitAllocateBufferCall(IRGenFunction &IGF,
                                            SILType T,
diff --git a/lib/IRGen/GenOpaque.h b/lib/IRGen/GenOpaque.h
index 921f9c17fac72..9815b68c2affa 100644
--- a/lib/IRGen/GenOpaque.h
+++ b/lib/IRGen/GenOpaque.h
@@ -230,6 +230,21 @@ namespace irgen {
   /// The type must be dynamically known to have extra inhabitant witnesses.
   llvm::Value *emitLoadOfExtraInhabitantCount(IRGenFunction &IGF, SILType T);
 
+  /// Emit a dynamic alloca call to allocate enough memory to hold an object of
+  /// type 'T' and an optional llvm.stackrestore point if 'isInEntryBlock' is
+  /// false.
+  struct DynamicAlloca {
+    llvm::Value *Alloca;
+    llvm::Value *SavedSP;
+    DynamicAlloca(llvm::Value *A, llvm::Value *SP) : Alloca(A), SavedSP(SP) {}
+  };
+  DynamicAlloca emitDynamicAlloca(IRGenFunction &IGF, SILType T,
+                                  bool isInEntryBlock);
+
+  /// Deallocate dynamic alloca's memory if the stack address has an SP restore
+  /// point associated with it.
+  void emitDeallocateDynamicAlloca(IRGenFunction &IGF, StackAddress address);
+
 } // end namespace irgen
 } // end namespace swift
 
diff --git a/lib/IRGen/IRGen.cpp b/lib/IRGen/IRGen.cpp
index d7c94b43f7405..3402e05c08133 100644
--- a/lib/IRGen/IRGen.cpp
+++ b/lib/IRGen/IRGen.cpp
@@ -27,8 +27,13 @@
 #include "swift/Basic/Timer.h"
 #include "swift/Basic/Version.h"
 #include "swift/ClangImporter/ClangImporter.h"
+#include "swift/IRGen/IRGenPublic.h"
+#include "swift/IRGen/IRGenSILPasses.h"
 #include "swift/LLVMPasses/PassesFwd.h"
 #include "swift/LLVMPasses/Passes.h"
+#include "swift/SILOptimizer/PassManager/Passes.h"
+#include "swift/SILOptimizer/PassManager/PassManager.h"
+#include "swift/SILOptimizer/PassManager/PassPipeline.h"
 #include "clang/Basic/TargetInfo.h"
 #include "llvm/Bitcode/BitcodeWriterPass.h"
 #include "llvm/Bitcode/ReaderWriter.h"
@@ -596,6 +601,45 @@ static void initLLVMModule(const IRGenModule &IGM) {
   Module->setDataLayout(IGM.DataLayout.getStringRepresentation());
 }
 
+std::pair<IRGenerator *, IRGenModule *>
+swift::irgen::createIRGenModule(SILModule *SILMod,
+                                llvm::LLVMContext &LLVMContext) {
+
+  IRGenOptions Opts;
+  IRGenerator *irgen = new IRGenerator(Opts, *SILMod);
+  auto targetMachine = irgen->createTargetMachine();
+  if (!targetMachine)
+    return std::make_pair(nullptr, nullptr);
+
+  // Create the IR emitter.
+  IRGenModule *IGM =
+      new IRGenModule(*irgen, std::move(targetMachine), nullptr, LLVMContext,
+                      "", Opts.getSingleOutputFilename());
+
+  initLLVMModule(*IGM);
+
+  return std::pair<IRGenerator *, IRGenModule *>(irgen, IGM);
+}
+
+void swift::irgen::deleteIRGenModule(
+    std::pair<IRGenerator *, IRGenModule *> &IRGenPair) {
+  delete IRGenPair.second;
+  delete IRGenPair.first;
+}
+
+/// \brief Run the IRGen preparation SIL pipeline. Passes have access to the
+/// IRGenModule.
+static void runIRGenPreparePasses(SILModule &Module,
+                                  irgen::IRGenModule &IRModule) {
+  SILPassManager PM(&Module, &IRModule);
+#define PASS(ID, Name, Description)
+#define IRGEN_PASS(ID, Name, Description)                                      \
+    PM.registerIRGenPass(swift::PassKind::ID, irgen::create##ID());
+#include "swift/SILOptimizer/PassManager/Passes.def"
+  PM.executePassPipelinePlan(
+      SILPassPipelinePlan::getIRGenPreparePassPipeline());
+}
+
 /// Generates LLVM IR, runs the LLVM passes and produces the output file.
 /// All this is done in a single thread.
 static std::unique_ptr<llvm::Module> performIRGeneration(IRGenOptions &Opts,
@@ -619,6 +663,9 @@ static std::unique_ptr<llvm::Module> performIRGeneration(IRGenOptions &Opts,
                   LLVMContext, ModuleName, Opts.getSingleOutputFilename());
 
   initLLVMModule(IGM);
+
+  // Run SIL level IRGen preparation passes.
+  runIRGenPreparePasses(*SILMod, IGM);
   
   {
     SharedTimer timer("IRGen");
@@ -769,6 +816,7 @@ static void performParallelIRGeneration(IRGenOptions &Opts,
 
   auto &Ctx = M->getASTContext();
   // Create an IRGenModule for each source file.
+  bool DidRunSILCodeGenPreparePasses = false;
   for (auto *File : M->getFiles()) {
     auto nextSF = dyn_cast<SourceFile>(File);
     if (!nextSF || nextSF->ASTStage < SourceFile::TypeChecked)
@@ -796,6 +844,12 @@ static void performParallelIRGeneration(IRGenOptions &Opts,
     IGMcreated = true;
 
     initLLVMModule(*IGM);
+    if (!DidRunSILCodeGenPreparePasses) {
+      // Run SIL level IRGen preparation passes on the module the first time
+      // around.
+      runIRGenPreparePasses(*SILMod, *IGM);
+      DidRunSILCodeGenPreparePasses = true;
+    }
   }
   
   if (!IGMcreated) {
diff --git a/lib/IRGen/IRGenSIL.cpp b/lib/IRGen/IRGenSIL.cpp
index a01ec0f295d29..b2c5416f0a3a6 100644
--- a/lib/IRGen/IRGenSIL.cpp
+++ b/lib/IRGen/IRGenSIL.cpp
@@ -108,14 +108,24 @@ class StaticFunction {
 class LoweredValue {
 public:
   enum class Kind {
-    /// This LoweredValue corresponds to a SIL address value.
-    /// The LoweredValue of an alloc_stack keeps an owning container in
-    /// addition to the address of the allocated buffer.
+    /// The first two LoweredValue kinds correspond to a SIL address value.
+    ///
+    /// The LoweredValue of an existential alloc_stack keeps an owning container
+    /// in addition to the address of the allocated buffer.
     /// Depending on the allocated type, the container may be equal to the
     /// buffer itself (for types with known sizes) or it may be the address
     /// of a fixed-size container which points to the heap-allocated buffer.
     /// In this case the address-part may be null, which means that the buffer
     /// is not allocated yet.
+    ContainedAddress,
+
+    /// The LoweredValue of a resilient, generic, or loadable typed alloc_stack
+    /// keeps an optional stackrestore point in addition to the address of the
+    /// allocated buffer. For all other address values the stackrestore point is
+    /// just null.
+    /// If the stackrestore point is set (currently, this might happen for
+    /// opaque types: generic and resilient) the deallocation of the stack must
+    /// reset the stack pointer to this point.
     Address,
 
     /// The following kinds correspond to SIL non-address values.
@@ -142,7 +152,8 @@ class LoweredValue {
   using ExplosionVector = SmallVector<llvm::Value *, 4>;
   
   union {
-    ContainedAddress address;
+    ContainedAddress containedAddress;
+    StackAddress address;
     OwnedAddress boxWithAddress;
     struct {
       ExplosionVector values;
@@ -153,9 +164,14 @@ class LoweredValue {
 
 public:
 
-  /// Create an address value without a container (the usual case).
+  /// Create an address value without a stack restore point.
   LoweredValue(const Address &address)
-    : kind(Kind::Address), address(Address(), address)
+    : kind(Kind::Address), address(address)
+  {}
+
+  /// Create an address value with an optional stack restore point.
+  LoweredValue(const StackAddress &address)
+    : kind(Kind::Address), address(address)
   {}
   
   enum ContainerForUnallocatedAddress_t { ContainerForUnallocatedAddress };
@@ -163,13 +179,13 @@ class LoweredValue {
   /// Create an address value for an alloc_stack, consisting of a container and
   /// a not yet allocated buffer.
   LoweredValue(const Address &container, ContainerForUnallocatedAddress_t)
-    : kind(Kind::Address), address(container, Address())
+    : kind(Kind::ContainedAddress), containedAddress(container, Address())
   {}
   
   /// Create an address value for an alloc_stack, consisting of a container and
   /// the address of the allocated buffer.
   LoweredValue(const ContainedAddress &address)
-  : kind(Kind::Address), address(address)
+    : kind(Kind::ContainedAddress), containedAddress(address)
   {}
   
   LoweredValue(StaticFunction &&staticFunction)
@@ -194,8 +210,11 @@ class LoweredValue {
     : kind(lv.kind)
   {    
     switch (kind) {
+    case Kind::ContainedAddress:
+      ::new (&containedAddress) ContainedAddress(std::move(lv.containedAddress));
+      break;
     case Kind::Address:
-      ::new (&address) ContainedAddress(std::move(lv.address));
+      ::new (&address) StackAddress(std::move(lv.address));
       break;
     case Kind::Explosion:
       ::new (&explosion.values) ExplosionVector(std::move(lv.explosion.values));
@@ -223,7 +242,8 @@ class LoweredValue {
     return kind == Kind::Address && address.getAddress().isValid();
   }
   bool isUnallocatedAddressInBuffer() const {
-    return kind == Kind::Address && !address.getAddress().isValid();
+    return kind == Kind::ContainedAddress &&
+           !containedAddress.getAddress().isValid();
   }
   bool isValue() const {
     return kind >= Kind::Value_First && kind <= Kind::Value_Last;
@@ -236,11 +256,16 @@ class LoweredValue {
     assert(isAddress() && "not an allocated address");
     return address.getAddress();
   }
+
+  StackAddress getStackAddress() const {
+    assert(isAddress() && "not an allocated address");
+    return address;
+  }
   
   Address getContainerOfAddress() const {
-    assert(kind == Kind::Address);
-    assert(address.getContainer().isValid() && "address has no container");
-    return address.getContainer();
+    assert(kind == Kind::ContainedAddress);
+    assert(containedAddress.getContainer().isValid() && "address has no container");
+    return containedAddress.getContainer();
   }
   
   void getExplosion(IRGenFunction &IGF, Explosion &ex) const;
@@ -271,7 +296,10 @@ class LoweredValue {
   ~LoweredValue() {
     switch (kind) {
     case Kind::Address:
-      address.~ContainedAddress();
+      address.~StackAddress();
+      break;
+    case Kind::ContainedAddress:
+      containedAddress.~ContainedAddress();
       break;
     case Kind::Explosion:
       explosion.values.~ExplosionVector();
@@ -326,28 +354,6 @@ class IRGenSILFunction :
   unsigned NumAnonVars = 0;
   unsigned NumCondFails = 0;
 
-  /// Notes about instructions for which we're supposed to perform some
-  /// sort of non-standard emission.  This enables some really simply local
-  /// peepholing in cases where you can't just do that with the lowered value.
-  ///
-  /// Since emission notes generally change semantics, we enforce that all
-  /// notes must be claimed.
-  ///
-  /// This uses a set because the current peepholes don't need to record any
-  /// extra structure; if you need extra structure, feel free to make it a
-  /// map.  This set is generally very small because claiming a note removes
-  /// it.
-  llvm::SmallPtrSet<SILInstruction *, 4> EmissionNotes;
-
-  void addEmissionNote(SILInstruction *inst) {
-    assert(inst);
-    EmissionNotes.insert(inst);
-  }
-
-  bool claimEmissionNote(SILInstruction *inst) {
-    return EmissionNotes.erase(inst);
-  }
-
   /// Accumulative amount of allocated bytes on the stack. Used to limit the
   /// size for stack promoted objects.
   /// We calculate it on demand, so that we don't have to do it if the
@@ -386,7 +392,7 @@ class IRGenSILFunction :
     setLoweredValue(v, address);
   }
 
-  void setLoweredContainedAddress(SILValue v, const ContainedAddress &address) {
+  void setLoweredStackAddress(SILValue v, const StackAddress &address) {
     assert(v->getType().isAddress() && "address for non-address value?!");
     setLoweredValue(v, address);
   }
@@ -508,6 +514,11 @@ class IRGenSILFunction :
   Address getLoweredAddress(SILValue v) {
     return getLoweredValue(v).getAddress();
   }
+
+  StackAddress getLoweredStackAddress(SILValue v) {
+    return getLoweredValue(v).getStackAddress();
+  }
+
   Address getLoweredContainerOfAddress(SILValue v) {
     return getLoweredValue(v).getContainerOfAddress();
   }
@@ -952,6 +963,7 @@ llvm::Value *StaticFunction::getExplosionValue(IRGenFunction &IGF) const {
 void LoweredValue::getExplosion(IRGenFunction &IGF, Explosion &ex) const {
   switch (kind) {
   case Kind::Address:
+  case Kind::ContainedAddress:
     llvm_unreachable("not a value");
       
   case Kind::Explosion:
@@ -976,6 +988,7 @@ void LoweredValue::getExplosion(IRGenFunction &IGF, Explosion &ex) const {
 llvm::Value *LoweredValue::getSingletonExplosion(IRGenFunction &IGF) const {
   switch (kind) {
   case Kind::Address:
+  case Kind::ContainedAddress:
     llvm_unreachable("not a value");
 
   case Kind::Explosion:
@@ -1456,8 +1469,6 @@ void IRGenSILFunction::emitSILFunction() {
     if (!visitedBlocks.count(&bb))
       LoweredBBs[&bb].bb->eraseFromParent();
 
-  assert(EmissionNotes.empty() &&
-         "didn't claim emission notes for all instructions!");
 }
 
 void IRGenSILFunction::estimateStackSize() {
@@ -1560,8 +1571,6 @@ void IRGenSILFunction::visitSILBasicBlock(SILBasicBlock *BB) {
     }
     visit(&I);
 
-    assert(!EmissionNotes.count(&I) &&
-           "didn't claim emission note for instruction!");
   }
   
   assert(Builder.hasPostTerminatorIP() && "SIL bb did not terminate block?!");
@@ -1943,6 +1952,7 @@ static CallEmission getCallEmissionForLoweredValue(IRGenSILFunction &IGF,
       
   case LoweredValue::Kind::BoxWithAddress:
     llvm_unreachable("@box isn't a valid callee");
+  case LoweredValue::Kind::ContainedAddress:
   case LoweredValue::Kind::Address:
     llvm_unreachable("sil address isn't a valid callee");
   }
@@ -2095,6 +2105,7 @@ getPartialApplicationFunction(IRGenSILFunction &IGF, SILValue v,
   auto fnType = v->getType().castTo<SILFunctionType>();
 
   switch (lv.kind) {
+  case LoweredValue::Kind::ContainedAddress:
   case LoweredValue::Kind::Address:
     llvm_unreachable("can't partially apply an address");
   case LoweredValue::Kind::BoxWithAddress:
@@ -3517,19 +3528,15 @@ void IRGenSILFunction::visitAllocStackInst(swift::AllocStackInst *i) {
 # endif
 
   (void) Decl;
-  // If a dynamic alloc_stack is immediately initialized by a copy_addr
-  // operation, we can combine the allocation and initialization using an
-  // optimized value witness.
-  if (tryDeferFixedSizeBufferInitialization(*this, i, type, Address(), dbgname))
-    return;
 
-  auto addr = type.allocateStack(*this,
-                                 i->getElementType(),
-                                 dbgname);
+  bool isEntryBlock =
+      i->getParentBlock() == i->getFunction()->getEntryBlock();
+  auto addr =
+      type.allocateStack(*this, i->getElementType(), isEntryBlock, dbgname);
 
   emitDebugInfoForAllocStack(i, type, addr.getAddress().getAddress());
   
-  setLoweredContainedAddress(i, addr);
+  setLoweredStackAddress(i, addr);
 }
 
 static void
@@ -3584,15 +3591,9 @@ void IRGenSILFunction::visitAllocRefDynamicInst(swift::AllocRefDynamicInst *i) {
 void IRGenSILFunction::visitDeallocStackInst(swift::DeallocStackInst *i) {
   auto allocatedType = i->getOperand()->getType();
   const TypeInfo &allocatedTI = getTypeInfo(allocatedType);
-  Address container = getLoweredContainerOfAddress(i->getOperand());
+  StackAddress stackAddr = getLoweredStackAddress(i->getOperand());
 
-  // If the type isn't fixed-size, check whether we added an emission note.
-  // If so, we should deallocate and destroy at the same time.
-  if (!isa<FixedTypeInfo>(allocatedTI) && claimEmissionNote(i)) {
-    allocatedTI.destroyStack(*this, container, allocatedType);
-  } else {
-    allocatedTI.deallocateStack(*this, container, allocatedType);
-  }
+  allocatedTI.deallocateStack(*this, stackAddr, allocatedType);
 }
 
 void IRGenSILFunction::visitDeallocRefInst(swift::DeallocRefInst *i) {
@@ -4600,48 +4601,10 @@ void IRGenSILFunction::visitCopyAddrInst(swift::CopyAddrInst *i) {
 // does not produce any values.
 void IRGenSILFunction::visitBindMemoryInst(swift::BindMemoryInst *) {}
 
-static DeallocStackInst *
-findPairedDeallocStackForDestroyAddr(DestroyAddrInst *destroyAddr) {
-  // This peephole only applies if the address being destroyed is the
-  // result of an alloc_stack.
-  auto allocStack = dyn_cast<AllocStackInst>(destroyAddr->getOperand());
-  if (!allocStack) return nullptr;
-
-  for (auto inst = &*std::next(destroyAddr->getIterator()); !isa<TermInst>(inst);
-       inst = &*std::next(inst->getIterator())) {
-    // If we find a dealloc_stack of the right memory, great.
-    if (auto deallocStack = dyn_cast<DeallocStackInst>(inst))
-      if (deallocStack->getOperand() == allocStack)
-        return deallocStack;
-
-    // Otherwise, if the instruction uses the alloc_stack result, treat it
-    // as interfering.  This assumes that any re-initialization of
-    // the alloc_stack will be obvious in the function.
-    for (auto &operand : inst->getAllOperands())
-      if (operand.get() == allocStack)
-        return nullptr;
-  }
-
-  // If we ran into the terminator, stop; only apply this peephole locally.
-  // TODO: this could use a fancier dominance analysis, maybe.
-  return nullptr;
-}
-
 void IRGenSILFunction::visitDestroyAddrInst(swift::DestroyAddrInst *i) {
   SILType addrTy = i->getOperand()->getType();
   const TypeInfo &addrTI = getTypeInfo(addrTy);
 
-  // Try to fold a destroy_addr of a dynamic alloc_stack into a single
-  // destroyBuffer operation.
-  if (!isa<FixedTypeInfo>(addrTI)) {
-    // If we can find a matching dealloc stack, just set an emission note
-    // on it; that will cause it to destroy the current value.
-    if (auto deallocStack = findPairedDeallocStackForDestroyAddr(i)) {
-      addEmissionNote(deallocStack);
-      return;
-    }
-  }
-
   // Otherwise, do the normal thing.
   Address base = getLoweredAddress(i->getOperand());
   addrTI.destroy(*this, base, addrTy);
diff --git a/lib/IRGen/NonFixedTypeInfo.h b/lib/IRGen/NonFixedTypeInfo.h
index c86a30c943e2d..921ddd5d172b0 100644
--- a/lib/IRGen/NonFixedTypeInfo.h
+++ b/lib/IRGen/NonFixedTypeInfo.h
@@ -23,6 +23,7 @@
 #ifndef SWIFT_IRGEN_NONFIXEDTYPEINFO_H
 #define SWIFT_IRGEN_NONFIXEDTYPEINFO_H
 
+#include "Address.h"
 #include "GenOpaque.h"
 #include "IndirectTypeInfo.h"
 
@@ -57,28 +58,30 @@ class WitnessSizedTypeInfo : public IndirectTypeInfo<Impl, TypeInfo> {
   // This is useful for metaprogramming.
   static bool isFixed() { return false; }
 
-  ContainedAddress allocateStack(IRGenFunction &IGF,
+  StackAddress allocateStack(IRGenFunction &IGF,
                                  SILType T,
+                                 bool isInEntryBlock,
                                  const llvm::Twine &name) const override {
-    // Make a fixed-size buffer.
-    Address buffer = IGF.createFixedSizeBufferAlloca(name);
-    IGF.Builder.CreateLifetimeStart(buffer, getFixedBufferSize(IGF.IGM));
-
-    // Allocate an object of the appropriate type within it.
-    llvm::Value *address = emitAllocateBufferCall(IGF, T, buffer);
-    return { buffer, getAsBitCastAddress(IGF, address) };
+    // Allocate memory on the stack.
+    auto alloca = emitDynamicAlloca(IGF, T, isInEntryBlock);
+    assert((isInEntryBlock && alloca.SavedSP == nullptr) ||
+           (!isInEntryBlock && alloca.SavedSP != nullptr) &&
+               "stacksave/restore operations can only be skipped in the entry "
+               "block");
+    IGF.Builder.CreateLifetimeStart(alloca.Alloca);
+    return { getAsBitCastAddress(IGF, alloca.Alloca), alloca.SavedSP };
   }
 
-  void deallocateStack(IRGenFunction &IGF, Address buffer,
+  void deallocateStack(IRGenFunction &IGF, StackAddress stackAddress,
                        SILType T) const override {
-    emitDeallocateBufferCall(IGF, T, buffer);
-    IGF.Builder.CreateLifetimeEnd(buffer, getFixedBufferSize(IGF.IGM));
+    IGF.Builder.CreateLifetimeEnd(stackAddress.getAddress().getAddress());
+    emitDeallocateDynamicAlloca(IGF, stackAddress);
   }
 
-  void destroyStack(IRGenFunction &IGF, Address buffer,
+  void destroyStack(IRGenFunction &IGF, StackAddress stackAddress,
                     SILType T) const override {
-    emitDestroyBufferCall(IGF, T, buffer);
-    IGF.Builder.CreateLifetimeEnd(buffer, getFixedBufferSize(IGF.IGM));
+    emitDestroyCall(IGF, T, stackAddress.getAddress());
+    deallocateStack(IGF, stackAddress, T);
   }
 
   llvm::Value *getValueWitnessTable(IRGenFunction &IGF, SILType T) const {
diff --git a/lib/IRGen/TypeInfo.h b/lib/IRGen/TypeInfo.h
index b498cfa186c36..f23a8f9c3225b 100644
--- a/lib/IRGen/TypeInfo.h
+++ b/lib/IRGen/TypeInfo.h
@@ -39,7 +39,7 @@ namespace swift {
 
 namespace irgen {
   class Address;
-  class ContainedAddress;
+  class StackAddress;
   class IRGenFunction;
   class IRGenModule;
   class Explosion;
@@ -257,17 +257,17 @@ class TypeInfo {
   ExplosionSchema getSchema() const;
 
   /// Allocate a variable of this type on the stack.
-  virtual ContainedAddress allocateStack(IRGenFunction &IGF,
-                                         SILType T,
-                                         const llvm::Twine &name) const = 0;
+  virtual StackAddress allocateStack(IRGenFunction &IGF, SILType T,
+                                     bool isInEntryBlock,
+                                     const llvm::Twine &name) const = 0;
 
   /// Deallocate a variable of this type.
-  virtual void deallocateStack(IRGenFunction &IGF, Address addr,
+  virtual void deallocateStack(IRGenFunction &IGF, StackAddress addr,
                                SILType T) const = 0;
 
   /// Destroy the value of a variable of this type, then deallocate its
   /// memory.
-  virtual void destroyStack(IRGenFunction &IGF, Address addr,
+  virtual void destroyStack(IRGenFunction &IGF, StackAddress addr,
                             SILType T) const = 0;
 
   /// Copy or take a value out of one address and into another, destroying
diff --git a/lib/SILOptimizer/Analysis/AliasAnalysis.cpp b/lib/SILOptimizer/Analysis/AliasAnalysis.cpp
index d331089d514a9..48baaae99b1b4 100644
--- a/lib/SILOptimizer/Analysis/AliasAnalysis.cpp
+++ b/lib/SILOptimizer/Analysis/AliasAnalysis.cpp
@@ -55,7 +55,7 @@ enum class AAKind : unsigned {
 } // end anonymous namespace
 
 static llvm::cl::opt<AAKind>
-DebugAAKinds("aa", llvm::cl::desc("Alias Analysis Kinds:"),
+DebugAAKinds("aa-kind", llvm::cl::desc("Alias Analysis Kinds:"),
              llvm::cl::init(AAKind::All),
              llvm::cl::values(clEnumValN(AAKind::None,
                                          "none",
diff --git a/lib/SILOptimizer/PassManager/PassManager.cpp b/lib/SILOptimizer/PassManager/PassManager.cpp
index 420fbdbb2b283..dfc5b0b31f3f1 100644
--- a/lib/SILOptimizer/PassManager/PassManager.cpp
+++ b/lib/SILOptimizer/PassManager/PassManager.cpp
@@ -241,6 +241,12 @@ SILPassManager::SILPassManager(SILModule *M, llvm::StringRef Stage) :
   }
 }
 
+SILPassManager::SILPassManager(SILModule *M, irgen::IRGenModule *IRMod,
+                               llvm::StringRef Stage)
+    : SILPassManager(M, Stage) {
+  this->IRMod = IRMod;
+}
+
 bool SILPassManager::continueTransforming() {
   return Mod->getStage() == SILStage::Raw ||
          NumPassesRun < SILNumOptPassesToRun;
@@ -513,6 +519,9 @@ void SILPassManager::runOneIteration() {
 
 /// D'tor.
 SILPassManager::~SILPassManager() {
+  assert(IRGenPasses.empty() && "Must add IRGen SIL passes that were "
+                                "registered to the list of transformations");
+
   // Free all transformations.
   for (auto *T : Transformations)
     delete T;
@@ -607,6 +616,15 @@ void SILPassManager::addPass(PassKind Kind) {
     Transformations.push_back(T);                                              \
     break;                                                                     \
   }
+#define IRGEN_PASS(ID, NAME, DESCRIPTION)                                      \
+  case PassKind::ID: {                                                         \
+    SILTransform *T = IRGenPasses[unsigned(Kind)];                             \
+    assert(T && "Missing IRGen pass?");                                        \
+    T->setPassKind(PassKind::ID);                                              \
+    Transformations.push_back(T);                                              \
+    IRGenPasses.erase(unsigned(Kind));                                         \
+    break;                                                                     \
+  }
 #include "swift/SILOptimizer/PassManager/Passes.def"
   case PassKind::invalidPassKind:
     llvm_unreachable("invalid pass kind");
diff --git a/lib/SILOptimizer/PassManager/PassPipeline.cpp b/lib/SILOptimizer/PassManager/PassPipeline.cpp
index d0fa3aba744d5..99c76f6093f08 100644
--- a/lib/SILOptimizer/PassManager/PassPipeline.cpp
+++ b/lib/SILOptimizer/PassManager/PassPipeline.cpp
@@ -401,6 +401,21 @@ static void addSILDebugInfoGeneratorPipeline(SILPassPipelinePlan &P) {
   P.addSILDebugInfoGenerator();
 }
 
+/// Non-mandatory passes that should run as preparation for IRGen.
+static void addIRGenPreparePipeline(SILPassPipelinePlan &P) {
+  P.startPipeline("IRGen Preparation");
+  // Insert SIL passes to run during IRGen.
+  // Hoist generic alloc_stack instructions to the entry block to enable better
+  // llvm-ir generation for dynamic alloca instructions.
+  P.addAllocStackHoisting();
+}
+
+SILPassPipelinePlan SILPassPipelinePlan::getIRGenPreparePassPipeline() {
+  SILPassPipelinePlan P;
+  addIRGenPreparePipeline(P);
+  return P;
+}
+
 SILPassPipelinePlan
 SILPassPipelinePlan::getPerformancePassPipeline(SILOptions Options) {
   SILPassPipelinePlan P;
diff --git a/test/IRGen/enum_resilience.swift b/test/IRGen/enum_resilience.swift
index 7a9e0b36b920f..5b44a75c21681 100644
--- a/test/IRGen/enum_resilience.swift
+++ b/test/IRGen/enum_resilience.swift
@@ -139,22 +139,28 @@ public func constructResilientEnumPayload(_ s: Size) -> Medium {
 }
 
 // CHECK-LABEL: define{{( protected)?}} {{i32|i64}} @_TF15enum_resilience19resilientSwitchTestFO14resilient_enum6MediumSi(%swift.opaque* noalias nocapture)
-// CHECK: [[BUFFER:%.*]] = alloca [[BUFFER_TYPE:\[(12|24) x i8\]]]
-
 // CHECK: [[METADATA:%.*]] = call %swift.type* @_TMaO14resilient_enum6Medium()
 // CHECK: [[METADATA_ADDR:%.*]] = bitcast %swift.type* [[METADATA]] to i8***
 // CHECK: [[VWT_ADDR:%.*]] = getelementptr inbounds i8**, i8*** [[METADATA_ADDR]], [[INT]] -1
 // CHECK: [[VWT:%.*]] = load i8**, i8*** [[VWT_ADDR]]
 
-// CHECK: [[WITNESS_ADDR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 5
+// CHECK: [[WITNESS_ADDR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 17
+// CHECK: [[WITNESS:%.*]] = load i8*, i8** [[WITNESS_ADDR]]
+// CHECK: [[WITNESS_FOR_SIZE:%.*]] = ptrtoint i8* [[WITNESS]]
+// CHECK: [[ALLOCA:%.*]] = alloca i8, {{.*}} [[WITNESS_FOR_SIZE]], align 16
+// CHECK: [[ALLOCA:%.*]] = alloca i8, {{.*}} [[WITNESS_FOR_SIZE]], align 16
+// CHECK: [[ALLOCA:%.*]] = alloca i8, {{.*}} [[WITNESS_FOR_SIZE]], align 16
+// CHECK: [[ENUM_STORAGE:%.*]] = bitcast i8* [[ALLOCA]] to %swift.opaque*
+
+// CHECK: [[WITNESS_ADDR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 6
 // CHECK: [[WITNESS:%.*]] = load i8*, i8** [[WITNESS_ADDR]]
 // CHECK: [[WITNESS_FN:%.*]] = bitcast i8* [[WITNESS]]
-// CHECK: [[ENUM_COPY:%.*]] = call %swift.opaque* [[WITNESS_FN]]([[BUFFER_TYPE]]* [[BUFFER]], %swift.opaque* %0, %swift.type* [[METADATA]])
+// CHECK: [[ENUM_COPY:%.*]] = call %swift.opaque* [[WITNESS_FN]](%swift.opaque* [[ENUM_STORAGE]], %swift.opaque* %0, %swift.type* [[METADATA]])
 
 // CHECK: [[WITNESS_ADDR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 23
 // CHECK: [[WITNESS:%.*]] = load i8*, i8** [[WITNESS_ADDR]]
 // CHECK: [[WITNESS_FN:%.*]] = bitcast i8* [[WITNESS]]
-// CHECK: [[TAG:%.*]] = call i32 %getEnumTag(%swift.opaque* [[ENUM_COPY]], %swift.type* [[METADATA]])
+// CHECK: [[TAG:%.*]] = call i32 %getEnumTag(%swift.opaque* [[ENUM_STORAGE]], %swift.type* [[METADATA]])
 
 // CHECK: switch i32 [[TAG]], label %[[DEFAULT_CASE:.*]] [
 // CHECK:   i32 -1, label %[[PAMPHLET_CASE:.*]]
diff --git a/test/IRGen/fixed_size_buffer_peepholes.sil b/test/IRGen/fixed_size_buffer_peepholes.sil
index 837592e034e43..56d41e5264c65 100644
--- a/test/IRGen/fixed_size_buffer_peepholes.sil
+++ b/test/IRGen/fixed_size_buffer_peepholes.sil
@@ -2,86 +2,14 @@
 
 import Builtin
 
-sil @consume : $@convention(thin) <T> (@in T) -> ()
-sil @produce : $@convention(thin) <T> () -> @out T
-
-// CHECK-LABEL: define{{( protected)?}} void @join_alloc_stack_copy_addr
-sil @join_alloc_stack_copy_addr : $@convention(thin) <T> (@in T) -> () {
-entry(%x : $*T):
-  // CHECK: [[BUFFER:%.*]] = alloca [[BUFFER_TYPE:\[.* x i8\]]]
-  // CHECK: [[BUFFERLIFE:%.*]] = bitcast [[BUFFER_TYPE]]* [[BUFFER]]
-  // CHECK: llvm.lifetime.start(i64 [[BUFFER_SIZE:12|24]], i8* [[BUFFERLIFE]])
-  %a = alloc_stack $T
-  // CHECK: [[ADDR:%.*]] = call %swift.opaque* %initializeBufferWithCopy([[BUFFER_TYPE]]* [[BUFFER]], %swift.opaque* %0, %swift.type* %T)
-  copy_addr %x to [initialization] %a : $*T
-  // CHECK: call void @consume(%swift.opaque* noalias nocapture [[ADDR]], %swift.type* %T)
-  %u = function_ref @consume : $@convention(thin) <T> (@in T) -> ()
-  %z = apply %u<T>(%a) : $@convention(thin) <T> (@in T) -> ()
-  // CHECK: [[BUFFERLIFE:%.*]] = bitcast [[BUFFER_TYPE]]* [[BUFFER]]
-  // CHECK: llvm.lifetime.end(i64 [[BUFFER_SIZE]], i8* [[BUFFERLIFE]])
-  dealloc_stack %a : $*T
-  return undef : $()
-}
-
 protocol P {}
 
 // CHECK-LABEL: define{{( protected)?}} void @join_init_existential_copy_addr(%P27fixed_size_buffer_peepholes1P_* noalias nocapture sret, %swift.opaque* noalias nocapture, %swift.type* %T, i8** %T.P)
 // CHECK:         [[BUFFER:%.*]] = getelementptr inbounds %P27fixed_size_buffer_peepholes1P_, %P27fixed_size_buffer_peepholes1P_* %0, i32 0, i32 0
-// CHECK:         call %swift.opaque* %initializeBufferWithTake([[BUFFER_TYPE]]* [[BUFFER]], %swift.opaque* %1
+// CHECK:         call %swift.opaque* %initializeBufferWithTake([[BUFFER_TYPE:\[.* x i8\]]]* [[BUFFER]], %swift.opaque* %1
 sil @join_init_existential_copy_addr : $@convention(thin) <T: P> (@in T) -> @out P {
 entry(%p : $*P, %x: $*T):
   %y = init_existential_addr %p : $*P, $T
   copy_addr [take] %x to [initialization] %y : $*T
   return undef : $()
 }
-
-// CHECK-LABEL: define{{( protected)?}} void @dont_join_alloc_stack_copy_addr_if_intervening_use
-sil @dont_join_alloc_stack_copy_addr_if_intervening_use : $@convention(thin) <T> (@in T) -> () {
-entry(%x : $*T):
-  // CHECK: [[BUFFER:%.*]] = alloca [[BUFFER_TYPE:\[.* x i8\]]]
-  // CHECK: [[ADDR:%.*]] = call %swift.opaque* %allocateBuffer([[BUFFER_TYPE]]* [[BUFFER]], %swift.type* %T)
-  %a = alloc_stack $T
-  %p = function_ref @produce : $@convention(thin) <T> () -> @out T
-  %y = apply %p<T>(%a) : $@convention(thin) <T> () -> @out T
-  destroy_addr %a : $*T
-
-  // CHECK: call %swift.opaque* %initializeWithCopy(%swift.opaque* [[ADDR]],
-  copy_addr %x to [initialization] %a : $*T
-  %u = function_ref @consume : $@convention(thin) <T> (@in T) -> ()
-  %z = apply %u<T>(%a) : $@convention(thin) <T> (@in T) -> ()
-  dealloc_stack %a : $*T
-  return undef : $()
-}
-
-// CHECK-LABEL: define{{( protected)?}} void @dont_join_alloc_stack_copy_addr_if_no_copy_addr
-sil @dont_join_alloc_stack_copy_addr_if_no_copy_addr : $@convention(thin) <T> (@in T) -> () {
-entry(%x : $*T):
-  // CHECK: [[BUFFER:%.*]] = alloca [[BUFFER_TYPE:\[.* x i8\]]]
-  // CHECK: [[ADDR:%.*]] = call %swift.opaque* %allocateBuffer([[BUFFER_TYPE]]* [[BUFFER]], %swift.type* %T)
-  %a = alloc_stack $T
-  %p = function_ref @produce : $@convention(thin) <T> () -> @out T
-  %y = apply %p<T>(%a) : $@convention(thin) <T> () -> @out T
-  destroy_addr %a : $*T
-  dealloc_stack %a : $*T
-  return undef : $()
-}
-
-// CHECK-LABEL: define{{( protected)?}} void @dont_join_alloc_stack_if_copy_addr_in_different_bb
-sil @dont_join_alloc_stack_if_copy_addr_in_different_bb : $@convention(thin) <T> (@in T) -> () {
-entry(%x : $*T):
-  // CHECK: [[BUFFER:%.*]] = alloca [[BUFFER_TYPE:\[.* x i8\]]]
-  // CHECK: [[ADDR:%.*]] = call %swift.opaque* %allocateBuffer([[BUFFER_TYPE]]* [[BUFFER]], %swift.type* %T)
-  %a = alloc_stack $T
-  br next
-
-next:
-  // CHECK: call %swift.opaque* %initializeWithCopy(%swift.opaque* [[ADDR]],
-  copy_addr %x to [initialization] %a : $*T
-  // CHECK: call void @consume(%swift.opaque* noalias nocapture [[ADDR]], %swift.type* %T)
-  %u = function_ref @consume : $@convention(thin) <T> (@in T) -> ()
-  %z = apply %u<T>(%a) : $@convention(thin) <T> (@in T) -> ()
-  dealloc_stack %a : $*T
-  return undef : $()
-}
-
-
diff --git a/test/IRGen/generic_casts.swift b/test/IRGen/generic_casts.swift
index f3203d4c3b15e..2d36ff80a8389 100644
--- a/test/IRGen/generic_casts.swift
+++ b/test/IRGen/generic_casts.swift
@@ -34,11 +34,18 @@ import gizmo
 // CHECK: define hidden i64 @_TF13generic_casts8allToInt{{.*}}(%swift.opaque* noalias nocapture, %swift.type* %T)
 func allToInt<T>(_ x: T) -> Int {
   return x as! Int
-  // CHECK: [[BUF:%.*]] = alloca [[BUFFER:.24 x i8.]],
   // CHECK: [[INT_TEMP:%.*]] = alloca %Si,
-  // CHECK: [[TEMP:%.*]] = call %swift.opaque* {{.*}}([[BUFFER]]* [[BUF]], %swift.opaque* %0, %swift.type* %T)
+	// CHECK: [[TYPE_ADDR:%.*]] = bitcast %swift.type* %T to i8***
+  // CHECK: [[VWT_ADDR:%.*]] = getelementptr inbounds i8**, i8*** [[TYPE_ADDR]], i64 -1
+  // CHECK: [[VWT:%.*]] = load i8**, i8*** [[VWT_ADDR]]
+  // CHECK: [[SIZE_WITNESS_ADDR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 17
+  // CHECK: [[SIZE_WITNESS:%.*]] = load i8*, i8** [[SIZE_WITNESS_ADDR]]
+  // CHECK: [[SIZE:%.*]] = ptrtoint i8* [[SIZE_WITNESS]]
+  // CHECK: [[T_ALLOCA:%.*]] = alloca i8, {{.*}} [[SIZE]], align 16
+  // CHECK: [[T_TMP:%.*]] = bitcast i8* [[T_ALLOCA]] to %swift.opaque*
+  // CHECK: [[TEMP:%.*]] = call %swift.opaque* {{.*}}(%swift.opaque* [[T_TMP]], %swift.opaque* %0, %swift.type* %T)
   // CHECK: [[T0:%.*]] = bitcast %Si* [[INT_TEMP]] to %swift.opaque*
-  // CHECK: call i1 @swift_rt_swift_dynamicCast(%swift.opaque* [[T0]], %swift.opaque* [[TEMP]], %swift.type* %T, %swift.type* @_TMSi, i64 7)
+  // CHECK: call i1 @swift_rt_swift_dynamicCast(%swift.opaque* [[T0]], %swift.opaque* [[T_TMP]], %swift.type* %T, %swift.type* @_TMSi, i64 7)
   // CHECK: [[T0:%.*]] = getelementptr inbounds %Si, %Si* [[INT_TEMP]], i32 0, i32 0
   // CHECK: [[INT_RESULT:%.*]] = load i64, i64* [[T0]],
   // CHECK: ret i64 [[INT_RESULT]]
diff --git a/test/IRGen/generic_tuples.swift b/test/IRGen/generic_tuples.swift
index 829ad7b958c59..f5d80fa91cc23 100644
--- a/test/IRGen/generic_tuples.swift
+++ b/test/IRGen/generic_tuples.swift
@@ -14,26 +14,25 @@ func dup<T>(_ x: T) -> (T, T) { var x = x; return (x,x) }
 // CHECK:    define hidden void @_TF14generic_tuples3dup{{.*}}(%swift.opaque* noalias nocapture, %swift.opaque* noalias nocapture, %swift.opaque* noalias nocapture, %swift.type* %T)
 // CHECK:    entry:
 //   Allocate a local variable for 'x'.
-// CHECK-NEXT: [[XBUF:%.*]] = alloca [[BUFFER:.*]], align 8
-// CHECK-NEXT: [[XBUFLIFE:%.*]] = bitcast {{.*}} [[XBUF]]
-// CHECK-NEXT: call void @llvm.lifetime.start({{.*}} [[XBUFLIFE]])
-// CHECK-NEXT: [[T0:%.*]] = bitcast [[TYPE]]* %T to i8***
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds i8**, i8*** [[T0]], i64 -1
-// CHECK-NEXT: [[T_VALUE:%.*]] = load i8**, i8*** [[T1]], align 8
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds i8*, i8** [[T_VALUE]]
-// CHECK-NEXT: [[T1:%.*]] = load i8*, i8** [[T0]], align 8
-// CHECK-NEXT: [[INITIALIZE_BUFFER_FN:%.*]] = bitcast i8* [[T1]] to [[OPAQUE]]* ([[BUFFER]]*, [[OPAQUE]]*, [[TYPE]]*)*
-// CHECK-NEXT: [[X:%.*]] = call [[OPAQUE]]* [[INITIALIZE_BUFFER_FN]]([[BUFFER]]* [[XBUF]], [[OPAQUE]]* {{.*}}, [[TYPE]]* %T)
+// CHECK: [[TYPE_ADDR:%.*]] = bitcast %swift.type* %T to i8***
+// CHECK: [[VWT_ADDR:%.*]] = getelementptr inbounds i8**, i8*** [[TYPE_ADDR]], i64 -1
+// CHECK: [[VWT:%.*]] = load i8**, i8*** [[VWT_ADDR]]
+// CHECK: [[SIZE_WITNESS_ADDR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 17
+// CHECK: [[SIZE_WITNESS:%.*]] = load i8*, i8** [[SIZE_WITNESS_ADDR]]
+// CHECK: [[SIZE:%.*]] = ptrtoint i8* [[SIZE_WITNESS]]
+// CHECK: [[X_ALLOCA:%.*]] = alloca i8, {{.*}} [[SIZE]], align 16
+// CHECK: [[X_TMP:%.*]] = bitcast i8* [[X_ALLOCA]] to %swift.opaque*
+// CHECK-NEXT: [[WITNESS_ADDR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 6
+// CHECK-NEXT: [[WITNESS:%.*]] = load i8*, i8** [[WITNESS_ADDR]], align 8
+// CHECK-NEXT: [[INITIALIZE_WITH_COPY:%.*]] = bitcast i8* [[WITNESS]] to [[OPAQUE]]* ([[OPAQUE]]*, [[OPAQUE]]*, [[TYPE]]*)*
+// CHECK-NEXT: [[X:%.*]] = call [[OPAQUE]]* [[INITIALIZE_WITH_COPY]]([[OPAQUE]]* [[X_TMP]], [[OPAQUE]]* {{.*}}, [[TYPE]]* %T)
 //   Copy 'x' into the first result.
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds i8*, i8** [[T_VALUE]], i32 6
-// CHECK-NEXT: [[T1:%.*]] = load i8*, i8** [[T0]], align 8
-// CHECK-NEXT: [[COPY_FN:%.*]] = bitcast i8* [[T1]] to [[OPAQUE]]* ([[OPAQUE]]*, [[OPAQUE]]*, [[TYPE]]*)*
-// CHECK-NEXT: call [[OPAQUE]]* [[COPY_FN]]([[OPAQUE]]* %0, [[OPAQUE]]* [[X]], [[TYPE]]* %T)
+// CHECK-NEXT: call [[OPAQUE]]* [[INITIALIZE_WITH_COPY]]([[OPAQUE]]* %0, [[OPAQUE]]* [[X_TMP]], [[TYPE]]* %T)
 //   Copy 'x' into the second element.
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds i8*, i8** [[T_VALUE]], i32 9
-// CHECK-NEXT: [[T1:%.*]] = load i8*, i8** [[T0]], align 8
-// CHECK-NEXT: [[TAKE_FN:%.*]] = bitcast i8* [[T1]] to [[OPAQUE]]* ([[OPAQUE]]*, [[OPAQUE]]*, [[TYPE]]*)*
-// CHECK-NEXT: call [[OPAQUE]]* [[TAKE_FN]]([[OPAQUE]]* %1, [[OPAQUE]]* [[X]], [[TYPE]]* %T)
+// CHECK-NEXT: [[WITNESS_ADDR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 9
+// CHECK-NEXT: [[WITNESS:%.*]] = load i8*, i8** [[WITNESS_ADDR]], align 8
+// CHECK-NEXT: [[TAKE_FN:%.*]] = bitcast i8* [[WITNESS]] to [[OPAQUE]]* ([[OPAQUE]]*, [[OPAQUE]]*, [[TYPE]]*)*
+// CHECK-NEXT: call [[OPAQUE]]* [[TAKE_FN]]([[OPAQUE]]* %1, [[OPAQUE]]* [[X_TMP]], [[TYPE]]* %T)
 
 struct S {}
 
diff --git a/test/IRGen/lifetime.sil b/test/IRGen/lifetime.sil
index 9f44d9be3772f..bdf43eb468aa0 100644
--- a/test/IRGen/lifetime.sil
+++ b/test/IRGen/lifetime.sil
@@ -18,30 +18,30 @@ bb0(%x : $*T):
   return %0 : $()
 }
 // CHECK:    define{{( protected)?}} void @generic([[OPAQUE]]* noalias nocapture, [[TYPE]]* %T) {{.*}} {
-//   The fixed-size buffer.
-// CHECK:      [[YBUF:%.*]] = alloca [[BUFFER:.*]], align
-// CHECK-NEXT: [[YBUFLIFE:%.*]] = bitcast [[BUFFER]]* [[YBUF]] to i8*
-// CHECK-NEXT: call void @llvm.lifetime.start(i64 [[BUFFER_SIZE:12|24]], i8* [[YBUFLIFE]])
 //   Allocate it.
-// CHECK-NEXT: [[T0:%.*]] = bitcast [[TYPE]]* %T to i8***
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds i8**, i8*** [[T0]], {{i32|i64}} -1
-// CHECK-NEXT: [[VWTABLE:%.*]] = load i8**, i8*** [[T1]], align
-// CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i8*, i8** [[VWTABLE]], i32 5
-// CHECK-NEXT: [[T4:%.*]] = load i8*, i8** [[T3]], align
-// CHECK-NEXT: [[BUFFER_COPY_FN:%.*]] = bitcast i8* [[T4]] to [[OPAQUE]]* ([[BUFFER]]*, [[OPAQUE]]*, [[TYPE]]*)*
+// CHECK: [[TYPE_ADDR:%.*]] = bitcast %swift.type* %T to i8***
+// CHECK-NEXT: [[VWT_ADDR:%.*]] = getelementptr inbounds i8**, i8*** [[TYPE_ADDR]], {{(i32|i64)}} -1
+// CHECK-NEXT: [[VWT:%.*]] = load i8**, i8*** [[VWT_ADDR]]
+// CHECK-NEXT: [[SIZE_WITNESS_ADDR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 17
+// CHECK-NEXT: [[SIZE_WITNESS:%.*]] = load i8*, i8** [[SIZE_WITNESS_ADDR]]
+// CHECK-NEXT: [[SIZE:%.*]] = ptrtoint i8* [[SIZE_WITNESS]]
+// CHECK-NEXT: [[Y_ALLOCA:%.*]] = alloca i8, {{.*}} [[SIZE]], align 16
+// CHECK-NEXT: call void @llvm.lifetime.start({{(i32|i64)}} -1, i8* [[Y_ALLOCA]])
+// CHECK-NEXT: [[Y_TMP:%.*]] = bitcast i8* [[Y_ALLOCA]] to %swift.opaque*
 //   Copy 'x' into 'y'.
-// CHECK-NEXT: [[Y:%.*]] = call [[OPAQUE]]* [[BUFFER_COPY_FN]]([[BUFFER]]* [[YBUF]], [[OPAQUE]]* [[X:%.*]], [[TYPE]]* %T)
-//   Destroy and deallocate 'y'.
-// CHECK-NEXT: [[T4:%.*]] = load i8*, i8** [[VWTABLE]], align
-// CHECK-NEXT: [[DESTROY_BUFFER_FN:%.*]] = bitcast i8* [[T4]] to void ([[BUFFER]]*, [[TYPE]]*)*
-// CHECK-NEXT: call void [[DESTROY_BUFFER_FN]]([[BUFFER]]* [[YBUF]], [[TYPE]]* %T)
-// CHECK-NEXT: [[YBUFLIFE:%.*]] = bitcast [[BUFFER]]* [[YBUF]] to i8*
-// CHECK-NEXT: call void @llvm.lifetime.end(i64 [[BUFFER_SIZE]], i8* [[YBUFLIFE]])
-//   Destroy 'x'.
-// CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i8*, i8** [[VWTABLE]], i32 4
+// CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 6
+// CHECK-NEXT: [[T4:%.*]] = load i8*, i8** [[T3]], align
+// CHECK-NEXT: [[INIT_WITH_COPY_FN:%.*]] = bitcast i8* [[T4]] to [[OPAQUE]]* ([[OPAQUE]]*, [[OPAQUE]]*, [[TYPE]]*)*
+// CHECK-NEXT: [[Y:%.*]] = call [[OPAQUE]]* [[INIT_WITH_COPY_FN]]([[OPAQUE]]* [[Y_TMP]], [[OPAQUE]]* [[X:%.*]], [[TYPE]]* %T)
+//   Destroy 'y'.
+// CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 4
 // CHECK-NEXT: [[T4:%.*]] = load i8*, i8** [[T3]], align
 // CHECK-NEXT: [[DESTROY_FN:%.*]] = bitcast i8* [[T4]] to void ([[OPAQUE]]*, [[TYPE]]*)*
+// CHECK-NEXT: call void [[DESTROY_FN]]([[OPAQUE]]* [[Y_TMP]], [[TYPE]]* %T)
+//   Destroy 'x'.
 // CHECK-NEXT: call void [[DESTROY_FN]]([[OPAQUE]]* [[X]], [[TYPE]]* %T)
+// CHECK-NEXT: [[YBUFLIFE:%.*]] = bitcast [[OPAQUE]]* [[Y_TMP]] to i8*
+// CHECK-NEXT: call void @llvm.lifetime.end({{(i32|i64)}} -1, i8* [[YBUFLIFE]])
 //   Return.
 // CHECK-NEXT: ret void
 
@@ -57,35 +57,35 @@ bb0(%x : $*T):
   return %0 : $()
 }
 // CHECK:    define{{( protected)?}} void @generic_with_reuse([[OPAQUE]]* noalias nocapture, [[TYPE]]* %T) {{.*}} {
-//   The fixed-size buffer.
-// CHECK:      [[YBUF:%.*]] = alloca [[BUFFER:.*]], align
-// CHECK-NEXT: [[YBUFLIFE:%.*]] = bitcast [[BUFFER]]* [[YBUF]] to i8*
-// CHECK-NEXT: call void @llvm.lifetime.start(i64 [[BUFFER_SIZE:12|24]], i8* [[YBUFLIFE]])
 //   Allocate it.
-// CHECK-NEXT: [[T0:%.*]] = bitcast [[TYPE]]* %T to i8***
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds i8**, i8*** [[T0]], {{i32|i64}} -1
-// CHECK-NEXT: [[VWTABLE:%.*]] = load i8**, i8*** [[T1]], align
-// CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i8*, i8** [[VWTABLE]], i32 5
-// CHECK-NEXT: [[T4:%.*]] = load i8*, i8** [[T3]], align
-// CHECK-NEXT: [[BUFFER_COPY_FN:%.*]] = bitcast i8* [[T4]] to [[OPAQUE]]* ([[BUFFER]]*, [[OPAQUE]]*, [[TYPE]]*)*
+// CHECK: [[TYPE_ADDR:%.*]] = bitcast %swift.type* %T to i8***
+// CHECK-NEXT: [[VWT_ADDR:%.*]] = getelementptr inbounds i8**, i8*** [[TYPE_ADDR]], {{(i32|i64)}} -1
+// CHECK-NEXT: [[VWT:%.*]] = load i8**, i8*** [[VWT_ADDR]]
+// CHECK-NEXT: [[SIZE_WITNESS_ADDR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 17
+// CHECK-NEXT: [[SIZE_WITNESS:%.*]] = load i8*, i8** [[SIZE_WITNESS_ADDR]]
+// CHECK-NEXT: [[SIZE:%.*]] = ptrtoint i8* [[SIZE_WITNESS]]
+// CHECK-NEXT: [[Y_ALLOCA:%.*]] = alloca i8, {{.*}} [[SIZE]], align 16
+// CHECK-NEXT: call void @llvm.lifetime.start({{(i32|i64)}} -1, i8* [[Y_ALLOCA]])
+// CHECK-NEXT: [[Y_TMP:%.*]] = bitcast i8* [[Y_ALLOCA]] to %swift.opaque*
 //   Copy 'x' into 'y'.
-// CHECK-NEXT: [[Y:%.*]] = call [[OPAQUE]]* [[BUFFER_COPY_FN]]([[BUFFER]]* [[YBUF]], [[OPAQUE]]* [[X:%.*]], [[TYPE]]* %T)
+// CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 6
+// CHECK-NEXT: [[T4:%.*]] = load i8*, i8** [[T3]], align
+// CHECK-NEXT: [[INIT_WITH_COPY_FN:%.*]] = bitcast i8* [[T4]] to [[OPAQUE]]* ([[OPAQUE]]*, [[OPAQUE]]*, [[TYPE]]*)*
+// CHECK-NEXT: [[Y:%.*]] = call [[OPAQUE]]* [[INIT_WITH_COPY_FN]]([[OPAQUE]]* [[Y_TMP]], [[OPAQUE]]* [[X:%.*]], [[TYPE]]* %T)
 //   Destroy 'y'.
-// CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i8*, i8** [[VWTABLE]], i32 4
+// CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 4
 // CHECK-NEXT: [[T4:%.*]] = load i8*, i8** [[T3]], align
 // CHECK-NEXT: [[DESTROY_FN:%.*]] = bitcast i8* [[T4]] to void ([[OPAQUE]]*, [[TYPE]]*)*
-// CHECK-NEXT: call void [[DESTROY_FN]]([[OPAQUE]]* [[Y]], [[TYPE]]* %T)
+// CHECK-NEXT: call void [[DESTROY_FN]]([[OPAQUE]]* [[Y_TMP]], [[TYPE]]* %T)
 //   Copy 'x' into 'y' again, this time as a take.
-// CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i8*, i8** [[VWTABLE]], i32 9
+// CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 9
 // CHECK-NEXT: [[T4:%.*]] = load i8*, i8** [[T3]], align
 // CHECK-NEXT: [[TAKE_FN:%.*]] = bitcast i8* [[T4]] to [[OPAQUE]]* ([[OPAQUE]]*, [[OPAQUE]]*, [[TYPE]]*)*
-// CHECK-NEXT: call [[OPAQUE]]* [[TAKE_FN]]([[OPAQUE]]* [[Y]], [[OPAQUE]]* [[X]], [[TYPE]]* %T)
-//   Destroy and deallocate 'y'.
-// CHECK-NEXT: [[T4:%.*]] = load i8*, i8** [[VWTABLE]], align
-// CHECK-NEXT: [[DESTROY_BUFFER_FN:%.*]] = bitcast i8* [[T4]] to void ([[BUFFER]]*, [[TYPE]]*)*
-// CHECK-NEXT: call void [[DESTROY_BUFFER_FN]]([[BUFFER]]* [[YBUF]], [[TYPE]]* %T)
-// CHECK-NEXT: [[YBUFLIFE:%.*]] = bitcast [[BUFFER]]* [[YBUF]] to i8*
-// CHECK-NEXT: call void @llvm.lifetime.end(i64 [[BUFFER_SIZE]], i8* [[YBUFLIFE]])
+// CHECK-NEXT: call [[OPAQUE]]* [[TAKE_FN]]([[OPAQUE]]* [[Y_TMP]], [[OPAQUE]]* [[X]], [[TYPE]]* %T)
+//   Destroy 'y'.
+// CHECK-NEXT: call void [[DESTROY_FN]]([[OPAQUE]]* [[Y_TMP]], [[TYPE]]* %T)
+// CHECK-NEXT: [[YBUFLIFE:%.*]] = bitcast [[OPAQUE]]* [[Y_TMP]] to i8*
+// CHECK-NEXT: call void @llvm.lifetime.end({{(i32|i64)}} -1, i8* [[YBUFLIFE]])
 //   Return.
 // CHECK-NEXT: ret void
 
@@ -102,9 +102,9 @@ bb0(%x : $*Builtin.Int64):
 // CHECK-LABEL: define{{( protected)?}} void @fixed_size(i64* noalias nocapture dereferenceable(8))
 // CHECK:         [[XBUF:%.*]] = alloca i64
 // CHECK-NEXT:    [[XBUFLIFE:%.*]] = bitcast i64* [[XBUF]] to i8*
-// CHECK-NEXT:    call void @llvm.lifetime.start(i64 8, i8* [[XBUFLIFE]])
+// CHECK-NEXT:    call void @llvm.lifetime.start({{(i32|i64)}} 8, i8* [[XBUFLIFE]])
 // CHECK-NEXT:    load
 // CHECK-NEXT:    store
 // CHECK-NEXT:    [[XBUFLIFE:%.*]] = bitcast i64* [[XBUF]] to i8*
-// CHECK-NEXT:    call void @llvm.lifetime.end(i64 8, i8* [[XBUFLIFE]])
+// CHECK-NEXT:    call void @llvm.lifetime.end({{(i32|i64)}} 8, i8* [[XBUFLIFE]])
 
diff --git a/test/IRGen/struct_resilience.swift b/test/IRGen/struct_resilience.swift
index 9ef001e95236c..81bab4a877bec 100644
--- a/test/IRGen/struct_resilience.swift
+++ b/test/IRGen/struct_resilience.swift
@@ -15,25 +15,24 @@ import resilient_enum
 
 public func functionWithResilientTypes(_ s: Size, f: (Size) -> Size) -> Size {
 
-// CHECK: [[RESULT:%.*]] = alloca [[BUFFER_TYPE:\[.* x i8\]]]
-
 // CHECK: [[METADATA:%.*]] = call %swift.type* @_TMaV16resilient_struct4Size()
 // CHECK: [[METADATA_ADDR:%.*]] = bitcast %swift.type* [[METADATA]] to i8***
 // CHECK: [[VWT_ADDR:%.*]] = getelementptr inbounds i8**, i8*** [[METADATA_ADDR]], [[INT]] -1
 // CHECK: [[VWT:%.*]] = load i8**, i8*** [[VWT_ADDR]]
 
-// CHECK: [[WITNESS_PTR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 5
+// CHECK: [[WITNESS_ADDR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 17
+// CHECK: [[WITNESS:%.*]] = load i8*, i8** [[WITNESS_ADDR]]
+// CHECK: [[WITNESS_FOR_SIZE:%.*]] = ptrtoint i8* [[WITNESS]]
+// CHECK: [[ALLOCA:%.*]] = alloca i8, {{.*}} [[WITNESS_FOR_SIZE]], align 16
+// CHECK: [[STRUCT_ADDR:%.*]] = bitcast i8* [[ALLOCA]] to %swift.opaque*
+
+// CHECK: [[WITNESS_PTR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 6
 // CHECK: [[WITNESS:%.*]] = load i8*, i8** [[WITNESS_PTR]]
-// CHECK: [[initializeBufferWithCopy:%.*]] = bitcast i8* [[WITNESS]]
-// CHECK: [[BUFFER:%.*]] = call %swift.opaque* [[initializeBufferWithCopy]]([[BUFFER_TYPE]]* [[RESULT]], %swift.opaque* %1, %swift.type* [[METADATA]])
+// CHECK: [[initializeWithCopy:%.*]] = bitcast i8* [[WITNESS]]
+// CHECK: [[STRUCT_LOC:%.*]] = call %swift.opaque* [[initializeWithCopy]](%swift.opaque* [[STRUCT_ADDR]], %swift.opaque* %1, %swift.type* [[METADATA]])
 
 // CHECK: [[FN:%.*]] = bitcast i8* %2 to void (%swift.opaque*, %swift.opaque*, %swift.refcounted*)*
-// CHECK: call void [[FN]](%swift.opaque* noalias nocapture sret %0, %swift.opaque* noalias nocapture [[BUFFER]], %swift.refcounted* %3)
-
-// CHECK: [[WITNESS_PTR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 3
-// CHECK: [[WITNESS:%.*]] = load i8*, i8** [[WITNESS_PTR]]
-// CHECK: [[deallocateBuffer:%.*]] = bitcast i8* [[WITNESS]]
-// CHECK: call void [[deallocateBuffer]]([[BUFFER_TYPE]]* [[RESULT]], %swift.type* [[METADATA]])
+// CHECK: call void [[FN]](%swift.opaque* noalias nocapture sret %0, %swift.opaque* noalias nocapture [[STRUCT_ADDR]], %swift.refcounted* %3)
 
 // CHECK: [[WITNESS_PTR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 4
 // CHECK: [[WITNESS:%.*]] = load i8*, i8** [[WITNESS_PTR]]
diff --git a/test/Runtime/linux-fatal-backtrace.swift b/test/Runtime/linux-fatal-backtrace.swift
index c569d7c82fe73..03eaced45ffb2 100644
--- a/test/Runtime/linux-fatal-backtrace.swift
+++ b/test/Runtime/linux-fatal-backtrace.swift
@@ -2,7 +2,6 @@
 // RUN: mkdir -p %t
 // RUN: %target-build-swift %s -o %t/a.out
 // RUN: not --crash %t/a.out 2>&1 | PYTHONPATH=%lldb-python-path %utils/symbolicate-linux-fatal %t/a.out - | %utils/backtrace-check -u
-
 // REQUIRES: executable_test
 // REQUIRES: OS=linux-gnu
 // REQUIRES: lldb
diff --git a/test/SILOptimizer/allocstack_hoisting.sil b/test/SILOptimizer/allocstack_hoisting.sil
new file mode 100644
index 0000000000000..1cc8cbbfebbd8
--- /dev/null
+++ b/test/SILOptimizer/allocstack_hoisting.sil
@@ -0,0 +1,207 @@
+// RUN: %target-sil-opt -assume-parsing-unqualified-ownership-sil -enable-sil-verify-all %s -alloc-stack-hoisting | %FileCheck %s
+// RUN: %target-sil-opt -assume-parsing-unqualified-ownership-sil -enable-sil-verify-all %s -alloc-stack-hoisting -sil-merge-stack-slots=false | %FileCheck %s
+sil_stage canonical
+
+import Builtin
+import Swift
+import SwiftShims
+
+protocol P {
+}
+
+struct Generic<T> {
+  var x : T
+}
+
+struct FixedSize {
+  var x : Builtin.Int8
+}
+
+// CHECK-LABEL: sil @hoist_generic
+// CHECK: bb0({{.*}}):
+// CHECK: [[AS:%.*]] = alloc_stack $T
+// CHECK: bb1:
+// CHECK-NOT: alloc_stack
+// CHECK-NOT: dealloc_stack
+// CHECK: bb2
+// CHECK: bb3:
+// CHECK: dealloc_stack [[AS]]
+// CHECK:   return
+
+sil @hoist_generic : $@convention(thin) <T> (@in T, Builtin.Int1) -> () {
+bb0(%0 : $*T, %1: $Builtin.Int1):
+  cond_br %1, bb1, bb2
+bb1:
+  %2 = alloc_stack $T
+  copy_addr [take] %0 to [initialization] %2 : $*T
+  destroy_addr %2 : $*T
+  dealloc_stack %2 : $*T
+  br bb3
+bb2:
+  destroy_addr %0 : $*T
+  br bb3
+bb3:
+  %3 = tuple ()
+  return %3 : $()
+}
+
+sil @throwing_fun : $@convention(thin) () -> @error Error
+
+// CHECK-LABEL: sil @hoist_generic
+// CHECK: bb0({{.*}}):
+// CHECK: [[AS:%.*]] = alloc_stack $T
+// CHECK: bb1:
+// CHECK-NOT: alloc_stack
+// CHECK-NOT: dealloc_stack
+// CHECK: bb2
+// CHECK: bb3:
+// CHECK:   try_apply
+// CHECK: bb4({{.*}}:
+// CHECK:   dealloc_stack [[AS]]
+// CHECK:   return
+// CHECK: bb5({{.*}}):
+// CHECK:   dealloc_stack [[AS]]
+// CHECK:   throw
+
+sil @hoist_generic_throwing : $@convention(thin) <T> (@in T, Builtin.Int1) -> @error Error {
+bb0(%0 : $*T, %1: $Builtin.Int1):
+  cond_br %1, bb1, bb2
+bb1:
+  %2 = alloc_stack $T
+  copy_addr [take] %0 to [initialization] %2 : $*T
+  destroy_addr %2 : $*T
+  dealloc_stack %2 : $*T
+  br bb3
+bb2:
+  destroy_addr %0 : $*T
+  br bb3
+bb3:
+  %3 = function_ref @throwing_fun : $@convention(thin) () -> @error Error
+  try_apply %3() : $@convention(thin) () -> @error Error, normal bb4, error bb5
+
+bb4(%6: $()):
+  %4 = tuple ()
+  return %4 : $()
+
+bb5(%5: $Error):
+  throw  %5: $Error
+}
+
+// CHECK-LABEL: sil @hoist_generic
+// CHECK: bb0({{.*}}):
+// CHECK: [[AS:%.*]] = alloc_stack $Generic<T>
+// CHECK: bb1:
+// CHECK-NOT: alloc_stack
+// CHECK-NOT: dealloc_stack
+// CHECK: bb2
+// CHECK: bb3:
+// CHECK: dealloc_stack [[AS]]
+// CHECK:   return
+
+sil @hoist_generic_type : $@convention(thin) <T> (@in Generic<T>, Builtin.Int1) -> () {
+bb0(%0 : $*Generic<T>, %1: $Builtin.Int1):
+  cond_br %1, bb1, bb2
+bb1:
+  %2 = alloc_stack $Generic<T>
+  copy_addr [take] %0 to [initialization] %2 : $*Generic<T>
+  destroy_addr %2 : $*Generic<T>
+  dealloc_stack %2 : $*Generic<T>
+  br bb3
+bb2:
+  destroy_addr %0 : $*Generic<T>
+  br bb3
+bb3:
+  %3 = tuple ()
+  return %3 : $()
+}
+
+// CHECK-LABEL: sil @hoist_generic_nesting
+// CHECK: bb0({{.*}}):
+// CHECK: [[AS:%.*]] = alloc_stack $T
+// CHECK: [[AS2:%.*]] = alloc_stack $T
+// CHECK: [[FIXED:%.*]] = alloc_stack $FixedSize
+// CHECK: bb1:
+// CHECK-NOT: alloc_stack
+// CHECK-NOT: dealloc_stack
+// CHECK: bb2
+// CHECK: bb3:
+// CHECK  dealloc_stack [[FIXED]]
+// CHECK: dealloc_stack [[AS2]]
+// CHECK: dealloc_stack [[AS]]
+// CHECK:   return
+
+sil @hoist_generic_nesting : $@convention(thin) <T> (@in T, Builtin.Int1) -> () {
+bb0(%0 : $*T, %1: $Builtin.Int1):
+  %2 = alloc_stack $FixedSize
+  cond_br %1, bb1, bb2
+bb1:
+  %3 = alloc_stack $T
+  %4 = alloc_stack $T
+  copy_addr [take] %0 to [initialization] %3 : $*T
+  destroy_addr %3 : $*T
+  dealloc_stack %4: $*T
+  dealloc_stack %3 : $*T
+  br bb3
+bb2:
+  destroy_addr %0 : $*T
+  br bb3
+bb3:
+  dealloc_stack %2: $*FixedSize
+  %5 = tuple ()
+  return %5 : $()
+}
+
+// CHECK-LABEL: sil @dont_hoist_opened_generic
+// CHECK: bb0({{.*}}):
+// CHECK-NOT: alloc_stack
+// CHECK: bb1:
+// CHECK: alloc_stack
+// CHECK: bb2:
+// CHECK: bb3:
+// CHECK-NOT: dealloc_stack
+// CHECK:   return
+
+sil @dont_hoist_opened_generic : $@convention(thin) (@in P, Builtin.Int1) -> () {
+bb0(%0 : $*P, %1: $Builtin.Int1):
+  cond_br %1, bb1, bb2
+bb1:
+  %2 = open_existential_addr %0 : $*P to $*@opened("1B6851A6-4796-11E6-B7DF-B8E856428C60") P
+  %3 = alloc_stack $@opened("1B6851A6-4796-11E6-B7DF-B8E856428C60") P
+	copy_addr [take] %2 to [initialization] %3 : $*@opened("1B6851A6-4796-11E6-B7DF-B8E856428C60") P
+  destroy_addr %3 : $*@opened("1B6851A6-4796-11E6-B7DF-B8E856428C60") P
+  dealloc_stack %3 : $*@opened("1B6851A6-4796-11E6-B7DF-B8E856428C60") P
+  br bb3
+bb2:
+  destroy_addr %0 : $*P
+  br bb3
+bb3:
+  %4 = tuple ()
+  return %4 : $()
+}
+
+// CHECK-LABEL: sil @dont_hoist_protocol
+// CHECK: bb0({{.*}}):
+// CHECK-NOT: alloc_stack
+// CHECK: bb1:
+// CHECK: alloc_stack
+// CHECK: bb2:
+// CHECK: bb3:
+// CHECK-NOT: dealloc_stack
+// CHECK:   return
+
+sil @dont_hoist_protocol : $@convention(thin) (@in P, Builtin.Int1) -> () {
+bb0(%0 : $*P, %1: $Builtin.Int1):
+  cond_br %1, bb1, bb2
+bb1:
+  %2 = alloc_stack $P
+	copy_addr [take] %0 to [initialization] %2 : $*P
+  destroy_addr %2 : $*P
+  dealloc_stack %2 : $*P
+  br bb3
+bb2:
+  destroy_addr %0 : $*P
+  br bb3
+bb3:
+  %3 = tuple ()
+  return %3 : $()
+}
diff --git a/test/SILOptimizer/basic-aa.sil b/test/SILOptimizer/basic-aa.sil
index d921a5edd74b3..679a3bdbb3158 100644
--- a/test/SILOptimizer/basic-aa.sil
+++ b/test/SILOptimizer/basic-aa.sil
@@ -1,4 +1,4 @@
-// RUN: %target-sil-opt -assume-parsing-unqualified-ownership-sil -module-name Swift %s -aa=basic-aa -aa-dump -o /dev/null | %FileCheck %s
+// RUN: %target-sil-opt -assume-parsing-unqualified-ownership-sil -module-name Swift %s -aa-kind=basic-aa -aa-dump -o /dev/null | %FileCheck %s
 
 // REQUIRES: asserts
 
diff --git a/test/SILOptimizer/mem-behavior.sil b/test/SILOptimizer/mem-behavior.sil
index 79e357c7e9d03..469e3133c9aa0 100644
--- a/test/SILOptimizer/mem-behavior.sil
+++ b/test/SILOptimizer/mem-behavior.sil
@@ -1,4 +1,4 @@
-// RUN: %target-sil-opt -assume-parsing-unqualified-ownership-sil %s -aa=basic-aa -mem-behavior-dump -o /dev/null | %FileCheck %s
+// RUN: %target-sil-opt -assume-parsing-unqualified-ownership-sil %s -aa-kind=basic-aa -mem-behavior-dump -o /dev/null | %FileCheck %s
 
 // REQUIRES: asserts
 
diff --git a/test/SILOptimizer/typed-access-tb-aa.sil b/test/SILOptimizer/typed-access-tb-aa.sil
index 6b223c4d9d565..90d7f5b01433f 100644
--- a/test/SILOptimizer/typed-access-tb-aa.sil
+++ b/test/SILOptimizer/typed-access-tb-aa.sil
@@ -1,4 +1,4 @@
-// RUN: %target-sil-opt -assume-parsing-unqualified-ownership-sil %s -aa=typed-access-tb-aa -aa-dump -o /dev/null | %FileCheck %s
+// RUN: %target-sil-opt -assume-parsing-unqualified-ownership-sil %s -aa-kind=typed-access-tb-aa -aa-dump -o /dev/null | %FileCheck %s
 
 // REQUIRES: asserts
 
diff --git a/tools/SourceKit/lib/SwiftLang/CMakeLists.txt b/tools/SourceKit/lib/SwiftLang/CMakeLists.txt
index d62ed29f61d37..799d396d5ab7c 100644
--- a/tools/SourceKit/lib/SwiftLang/CMakeLists.txt
+++ b/tools/SourceKit/lib/SwiftLang/CMakeLists.txt
@@ -10,7 +10,7 @@ add_sourcekit_library(SourceKitSwiftLang
   SwiftSourceDocInfo.cpp
   DEPENDS SourceKitCore swiftDriver swiftFrontend swiftClangImporter swiftIndex swiftIDE
           swiftAST swiftMarkup swiftParse swiftSIL swiftSILGen swiftSILOptimizer
-          swiftSema swiftBasic swiftSerialization
+          swiftIRGen swiftSema swiftBasic swiftSerialization
           swiftOption cmark
     # Clang dependencies.
       clangIndex
diff --git a/tools/sil-opt/CMakeLists.txt b/tools/sil-opt/CMakeLists.txt
index f2ccbb8fbf0b8..45f2cf68853c8 100644
--- a/tools/sil-opt/CMakeLists.txt
+++ b/tools/sil-opt/CMakeLists.txt
@@ -5,6 +5,9 @@ add_swift_host_tool(sil-opt
     swiftIRGen
     swiftSILGen
     swiftSILOptimizer
+    # Clang libraries included to appease the linker on linux.
+    clangBasic
+    clangCodeGen
   LLVM_COMPONENT_DEPENDS
     DebugInfoCodeView
   SWIFT_COMPONENT tools
diff --git a/tools/sil-opt/SILOpt.cpp b/tools/sil-opt/SILOpt.cpp
index 45e2fc14588db..1c5343c517c13 100644
--- a/tools/sil-opt/SILOpt.cpp
+++ b/tools/sil-opt/SILOpt.cpp
@@ -20,6 +20,7 @@
 #include "swift/AST/DiagnosticsFrontend.h"
 #include "swift/AST/SILOptions.h"
 #include "swift/Basic/LLVMInitialize.h"
+#include "swift/Basic/LLVMContext.h"
 #include "swift/Frontend/DiagnosticVerifier.h"
 #include "swift/Frontend/Frontend.h"
 #include "swift/Frontend/PrintingDiagnosticConsumer.h"
@@ -29,6 +30,8 @@
 #include "swift/Serialization/SerializedModuleLoader.h"
 #include "swift/Serialization/SerializedSILLoader.h"
 #include "swift/Serialization/SerializationOptions.h"
+#include "swift/IRGen/IRGenPublic.h"
+#include "swift/IRGen/IRGenSILPasses.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
@@ -169,10 +172,20 @@ AssumeUnqualifiedOwnershipWhenParsing(
     "assume-parsing-unqualified-ownership-sil", llvm::cl::Hidden, llvm::cl::init(false),
     llvm::cl::desc("Assume all parsed functions have unqualified ownership"));
 
-static void runCommandLineSelectedPasses(SILModule *Module) {
-  SILPassManager PM(Module);
+static void runCommandLineSelectedPasses(SILModule *Module,
+                                         irgen::IRGenModule *IRGenMod) {
+  SILPassManager PM(Module, IRGenMod);
+  for (auto P : Passes) {
+#define PASS(ID, Name, Description)
+#define IRGEN_PASS(ID, Name, Description)                                      \
+  if (P == PassKind::ID)                                                       \
+    PM.registerIRGenPass(swift::PassKind::ID, irgen::create##ID());
+#include "swift/SILOptimizer/PassManager/Passes.def"
+  }
+
   PM.executePassPipelinePlan(
       SILPassPipelinePlan::getPassPipelineForKinds(Passes));
+
   if (Module->getOptions().VerifyAll)
     Module->verify();
 }
@@ -318,7 +331,12 @@ int main(int argc, char **argv) {
   } else if (OptimizationGroup == OptGroup::Performance) {
     runSILOptimizationPasses(*CI.getSILModule());
   } else {
-    runCommandLineSelectedPasses(CI.getSILModule());
+    auto *SILMod = CI.getSILModule();
+    {
+      auto T = irgen::createIRGenModule(SILMod, getGlobalLLVMContext());
+      runCommandLineSelectedPasses(SILMod, T.second);
+      irgen::deleteIRGenModule(T);
+    }
   }
 
   if (EmitSIB) {