From 9722578df69e886006db241f665f9ced03f40b33 Mon Sep 17 00:00:00 2001
From: Erik Eckstein <eeckstein@apple.com>
Date: Tue, 19 May 2020 20:17:36 +0200
Subject: [PATCH 01/10] SILOptimizer: a new optimization for copy-on-write

Constant folds the uniqueness result of begin_cow_mutation instructions, if it can be proved that the buffer argument is uniquely referenced.
For example:

     %buffer = end_cow_mutation %mutable_buffer
     // ...
     // %buffer does not escape here
     // ...
     (%is_unique, %mutable_buffer2) = begin_cow_mutation %buffer
     cond_br %is_unique, ...

is replaced with

     %buffer = end_cow_mutation [keep_unique] %mutable_buffer
     // ...
     (%not_used, %mutable_buffer2) = begin_cow_mutation %buffer
     %true = integer_literal 1
     cond_br %true, ...

Note that the keep_unique flag is set on the end_cow_mutation because the code now relies on that the buffer is really uniquely referenced.

The optimization can also handle def-use chains between end_cow_mutation and begin_cow_mutation which involve phi-arguments.

An additional peephole optimization is performed: if the begin_cow_mutation is the only use of the end_cow_mutation, the whole pair of instructions is eliminated.
---
 .../swift/SILOptimizer/PassManager/Passes.def |   2 +
 lib/SILOptimizer/PassManager/PassPipeline.cpp |   2 +
 lib/SILOptimizer/Transforms/CMakeLists.txt    |   1 +
 lib/SILOptimizer/Transforms/COWOpts.cpp       | 277 ++++++++++++++++++
 test/SILOptimizer/cow_opts.sil                | 158 ++++++++++
 5 files changed, 440 insertions(+)
 create mode 100644 lib/SILOptimizer/Transforms/COWOpts.cpp
 create mode 100644 test/SILOptimizer/cow_opts.sil

diff --git a/include/swift/SILOptimizer/PassManager/Passes.def b/include/swift/SILOptimizer/PassManager/Passes.def
index 3b2c9eacde8e6..b18748cb603d0 100644
--- a/include/swift/SILOptimizer/PassManager/Passes.def
+++ b/include/swift/SILOptimizer/PassManager/Passes.def
@@ -120,6 +120,8 @@ PASS(CopyForwarding, "copy-forwarding",
      "Copy Forwarding to Remove Redundant Copies")
 PASS(CopyPropagation, "copy-propagation",
      "Copy propagation to Remove Redundant SSA Copies")
+PASS(COWOpts, "cow-opts",
+     "Optimize COW operations")
 PASS(Differentiation, "differentiation",
      "Automatic Differentiation")
 PASS(EpilogueARCMatcherDumper, "sil-epilogue-arc-dumper",
diff --git a/lib/SILOptimizer/PassManager/PassPipeline.cpp b/lib/SILOptimizer/PassManager/PassPipeline.cpp
index ef5d63181638a..259024fe49f22 100644
--- a/lib/SILOptimizer/PassManager/PassPipeline.cpp
+++ b/lib/SILOptimizer/PassManager/PassPipeline.cpp
@@ -366,6 +366,7 @@ void addFunctionPasses(SILPassPipelinePlan &P,
     P.addRedundantLoadElimination();
   }
 
+  P.addCOWOpts();
   P.addPerformanceConstantPropagation();
   // Remove redundant arguments right before CSE and DCE, so that CSE and DCE
   // can cleanup redundant and dead instructions.
@@ -595,6 +596,7 @@ static void addLateLoopOptPassPipeline(SILPassPipelinePlan &P) {
   P.addAccessEnforcementReleaseSinking();
   P.addAccessEnforcementOpts();
   P.addLICM();
+  P.addCOWOpts();
   // Simplify CFG after LICM that creates new exit blocks
   P.addSimplifyCFG();
   // LICM might have added new merging potential by hoisting
diff --git a/lib/SILOptimizer/Transforms/CMakeLists.txt b/lib/SILOptimizer/Transforms/CMakeLists.txt
index 30868a98f205a..b5cfb49ac39d8 100644
--- a/lib/SILOptimizer/Transforms/CMakeLists.txt
+++ b/lib/SILOptimizer/Transforms/CMakeLists.txt
@@ -8,6 +8,7 @@ target_sources(swiftSILOptimizer PRIVATE
   ArrayCountPropagation.cpp
   ArrayElementValuePropagation.cpp
   AssumeSingleThreaded.cpp
+  COWOpts.cpp
   CSE.cpp
   ConditionForwarding.cpp
   CopyForwarding.cpp
diff --git a/lib/SILOptimizer/Transforms/COWOpts.cpp b/lib/SILOptimizer/Transforms/COWOpts.cpp
new file mode 100644
index 0000000000000..de13369819e7f
--- /dev/null
+++ b/lib/SILOptimizer/Transforms/COWOpts.cpp
@@ -0,0 +1,277 @@
+//===--- COWOpts.cpp - Optimize COW operations ----------------------------===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2014 - 2020 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass optimizes begin_cow_mutation and end_cow_mutation patterns.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "cow-opts"
+#include "swift/SILOptimizer/PassManager/Transforms.h"
+#include "swift/SILOptimizer/Analysis/AliasAnalysis.h"
+#include "swift/SIL/SILFunction.h"
+#include "swift/SIL/SILBasicBlock.h"
+#include "swift/SIL/SILArgument.h"
+#include "swift/SIL/SILBuilder.h"
+#include "llvm/Support/Debug.h"
+
+using namespace swift;
+
+namespace {
+
+/// Constant folds the uniqueness result of begin_cow_mutation instructions.
+///
+/// If it can be proved that the buffer argument is uniquely referenced, the
+/// uniqueness result is replaced with a constant boolean "true".
+/// For example:
+///
+/// \code
+///     %buffer = end_cow_mutation %mutable_buffer
+///     // ...
+///     // %buffer does not escape here
+///     // ...
+///     (%is_unique, %mutable_buffer2) = begin_cow_mutation %buffer
+///     cond_br %is_unique, ...
+/// \endcode
+///
+/// is replaced with
+///
+/// \code
+///     %buffer = end_cow_mutation [keep_unique] %mutable_buffer
+///     // ...
+///     (%not_used, %mutable_buffer2) = begin_cow_mutation %buffer
+///     %true = integer_literal 1
+///     cond_br %true, ...
+/// \endcode
+///
+/// Note that the keep_unique flag is set on the end_cow_mutation because the
+/// code now relies on that the buffer is really uniquely referenced.
+///
+/// The optimization can also handle def-use chains between end_cow_mutation and
+/// begin_cow_mutation which involve phi-arguments.
+///
+/// An additional peephole optimization is performed: if the begin_cow_mutation
+/// is the only use of the end_cow_mutation, the whole pair of instructions
+/// is eliminated.
+///
+class COWOptsPass : public SILFunctionTransform {
+public:
+  COWOptsPass() {}
+
+  void run() override;
+
+private:
+  using InstructionSet = SmallPtrSet<SILInstruction *, 8>;
+  using VoidPointerSet = SmallPtrSet<void *, 8>;
+
+  AliasAnalysis *AA = nullptr;
+
+  bool optimizeBeginCOW(BeginCOWMutationInst *BCM);
+
+  static void collectEscapePoints(SILValue v,
+                                  InstructionSet &escapePoints,
+                                  VoidPointerSet &handled);
+};
+
+void COWOptsPass::run() {
+  SILFunction *F = getFunction();
+  if (!F->shouldOptimize())
+    return;
+
+  LLVM_DEBUG(llvm::dbgs() << "*** RedundantPhiElimination on function: "
+                          << F->getName() << " ***\n");
+
+  AA = PM->getAnalysis<AliasAnalysis>();
+
+  bool changed = false;
+  for (SILBasicBlock &block : *F) {
+    auto iter = block.begin();
+    while (iter != block.end()) {
+      SILInstruction *inst = &*iter++;
+      if (auto *beginCOW = dyn_cast<BeginCOWMutationInst>(inst))
+        changed |= optimizeBeginCOW(beginCOW);
+    }
+  }
+
+  if (changed) {
+    invalidateAnalysis(SILAnalysis::InvalidationKind::Instructions);
+  }
+}
+
+bool COWOptsPass::optimizeBeginCOW(BeginCOWMutationInst *BCM) {
+  VoidPointerSet handled;
+  SmallVector<SILValue, 8> workList;
+  SmallPtrSet<EndCOWMutationInst *, 4> endCOWMutationInsts;
+
+  // Collect all end_cow_mutation instructions, used by the begin_cow_mutation,
+  // looking through block phi-arguments.
+  workList.push_back(BCM->getOperand());
+  while (!workList.empty()) {
+    SILValue v = workList.pop_back_val();
+    if (SILPhiArgument *arg = dyn_cast<SILPhiArgument>(v)) {
+      if (handled.insert(arg).second) {
+        SmallVector<SILValue, 4> incomingVals;
+        if (!arg->getIncomingPhiValues(incomingVals))
+          return false;
+        for (SILValue incomingVal : incomingVals) {
+          workList.push_back(incomingVal);
+        }
+      }
+    } else if (auto *ECM = dyn_cast<EndCOWMutationInst>(v)) {
+      endCOWMutationInsts.insert(ECM);
+    } else {
+      return false;
+    }
+  }
+
+  // Collect all uses of the end_cow_instructions, where the buffer can
+  // potentially escape.
+  handled.clear();
+  InstructionSet potentialEscapePoints;
+  for (EndCOWMutationInst *ECM : endCOWMutationInsts) {
+    collectEscapePoints(ECM, potentialEscapePoints, handled);
+  }
+
+  if (!potentialEscapePoints.empty()) {
+    // Now, this is the complicated part: check if there is an escape point
+    // within the liverange between the end_cow_mutation(s) and
+    // begin_cow_mutation.
+    //
+    // For store instructions we do a little bit more: only count a store as an
+    // escape if there is a (potential) load from the same address within the
+    // liverange.
+    handled.clear();
+    SmallVector<SILInstruction *, 8> instWorkList;
+    SmallVector<SILInstruction *, 8> potentialLoadInsts;
+    llvm::DenseSet<SILValue> storeAddrs;
+  
+    // This is a simple worklist-based backward dataflow analysis.
+    // Start at the initial begin_cow_mutation and go backward.
+    instWorkList.push_back(BCM);
+
+    while (!instWorkList.empty()) {
+      SILInstruction *inst = instWorkList.pop_back_val();
+      for (;;) {
+        if (potentialEscapePoints.count(inst) != 0) {
+          if (auto *store = dyn_cast<StoreInst>(inst)) {
+            // Don't immediately bail on a store instruction. Instead, remember
+            // it and check if it interfers with any (potential) load.
+            storeAddrs.insert(store->getDest());
+          } else {
+            return false;
+          }
+        }
+        if (inst->mayReadFromMemory())
+          potentialLoadInsts.push_back(inst);
+
+        // An end_cow_mutation marks the begin of the liverange. It's the end
+        // point of the dataflow analysis.
+        auto *ECM = dyn_cast<EndCOWMutationInst>(inst);
+        if (ECM && endCOWMutationInsts.count(ECM) != 0)
+          break;
+
+        if (inst == &inst->getParent()->front()) {
+          for (SILBasicBlock *pred : inst->getParent()->getPredecessorBlocks()) {
+            if (handled.insert(pred).second)
+              instWorkList.push_back(pred->getTerminator());
+          }
+          break;
+        }
+
+        inst = &*std::prev(inst->getIterator());
+      }
+    }
+    
+    // Check if there is any (potential) load from a memory location where the
+    // buffer is stored to.
+    if (!storeAddrs.empty()) {
+      // Avoid quadratic behavior. Usually this limit is not exceeded.
+      if (storeAddrs.size() * potentialLoadInsts.size() > 128)
+        return false;
+      for (SILInstruction *load : potentialLoadInsts) {
+        for (SILValue storeAddr : storeAddrs) {
+          if (!AA || AA->mayReadFromMemory(load, storeAddr))
+            return false;
+        }
+      }
+    }
+  }
+
+  // Replace the uniqueness result of the begin_cow_mutation with an integer
+  // literal of "true".
+  SILBuilderWithScope B(BCM);
+  auto *IL = B.createIntegerLiteral(BCM->getLoc(),
+                                    BCM->getUniquenessResult()->getType(), 1);
+  BCM->getUniquenessResult()->replaceAllUsesWith(IL);
+  
+  // Try the peephole optimization: remove an end_cow_mutation/begin_cow_mutation
+  // pair completely if the begin_cow_mutation is the only use of
+  // end_cow_mutation.
+  if (auto *singleEndCOW = dyn_cast<EndCOWMutationInst>(BCM->getOperand())) {
+    assert(endCOWMutationInsts.size() == 1 &&
+           *endCOWMutationInsts.begin() == singleEndCOW);
+    if (singleEndCOW->hasOneUse()) {
+      BCM->getBufferResult()->replaceAllUsesWith(singleEndCOW->getOperand());
+      BCM->eraseFromParent();
+      singleEndCOW->eraseFromParent();
+      return true;
+    }
+  }
+
+  for (EndCOWMutationInst *ECM : endCOWMutationInsts) {
+    // This is important for other optimizations: The code is now relying on
+    // the buffer to be unique.
+    ECM->setKeepUnique();
+  }
+
+  return true;
+}
+
+void COWOptsPass::collectEscapePoints(SILValue v,
+                                      InstructionSet &escapePoints,
+                                      VoidPointerSet &handled) {
+  if (!handled.insert(v.getOpaqueValue()).second)
+    return;
+
+  for (Operand *use : v->getUses()) {
+    SILInstruction *user = use->getUser();
+    switch (user->getKind()) {
+      case SILInstructionKind::BeginCOWMutationInst:
+      case SILInstructionKind::RefElementAddrInst:
+      case SILInstructionKind::RefTailAddrInst:
+        break;
+      case SILInstructionKind::BranchInst:
+        collectEscapePoints(cast<BranchInst>(user)->getArgForOperand(use),
+                            escapePoints, handled);
+        break;
+      case SILInstructionKind::CondBranchInst:
+        collectEscapePoints(cast<CondBranchInst>(user)->getArgForOperand(use),
+                            escapePoints, handled);
+        break;
+      case SILInstructionKind::StructInst:
+      case SILInstructionKind::TupleInst:
+      case SILInstructionKind::UncheckedRefCastInst:
+        collectEscapePoints(cast<SingleValueInstruction>(user),
+                            escapePoints, handled);
+        break;
+      default:
+        // Everything else is considered to be a potential escape of the buffer.
+        escapePoints.insert(user);
+    }
+  }
+}
+
+} // end anonymous namespace
+
+SILTransform *swift::createCOWOpts() {
+  return new COWOptsPass();
+}
+
diff --git a/test/SILOptimizer/cow_opts.sil b/test/SILOptimizer/cow_opts.sil
new file mode 100644
index 0000000000000..c3b8d9d8d8208
--- /dev/null
+++ b/test/SILOptimizer/cow_opts.sil
@@ -0,0 +1,158 @@
+// RUN: %target-sil-opt %s -cow-opts | %FileCheck %s
+
+sil_stage canonical
+
+import Builtin
+import Swift
+import SwiftShims
+
+final class Buffer {
+  @_hasStorage var i: Int { get set }
+  init()
+}
+
+sil @unknown : $@convention(thin) (@guaranteed Buffer) -> ()
+
+// CHECK-LABEL: sil @test_complete_removal
+// CHECK:   [[I:%[0-9]+]] = integer_literal $Builtin.Int1, -1
+// CHECK:   [[T:%[0-9]+]] = tuple ([[I]] : $Builtin.Int1, %0 : $Buffer)
+// CHECK:   return [[T]]
+// CHECK: } // end sil function 'test_complete_removal'
+sil @test_complete_removal : $@convention(thin) (@owned Buffer) -> (Builtin.Int1, @owned Buffer) {
+bb0(%0 : $Buffer):
+  %e = end_cow_mutation %0 : $Buffer
+  (%u, %b) = begin_cow_mutation %e : $Buffer
+  %t = tuple (%u : $Builtin.Int1, %b : $Buffer)
+  return %t : $(Builtin.Int1, Buffer)
+}
+
+// CHECK-LABEL: sil @test_simple
+// CHECK:   [[I:%[0-9]+]] = integer_literal $Builtin.Int1, -1
+// CHECK:   ({{.*}}, [[B:%[0-9]+]]) = begin_cow_mutation
+// CHECK:   [[T:%[0-9]+]] = tuple ({{.*}}, [[I]] : $Builtin.Int1, [[B]] : $Buffer)
+// CHECK:   return [[T]]
+// CHECK: } // end sil function 'test_simple'
+sil @test_simple : $@convention(thin) (@owned Buffer) -> (Int, Builtin.Int1, @owned Buffer) {
+bb0(%0 : $Buffer):
+  %e = end_cow_mutation %0 : $Buffer
+  %addr = ref_element_addr [immutable] %e : $Buffer, #Buffer.i
+  %i = load %addr : $*Int
+  (%u, %b) = begin_cow_mutation %e : $Buffer
+  %t = tuple (%i : $Int, %u : $Builtin.Int1, %b : $Buffer)
+  return %t : $(Int, Builtin.Int1, Buffer)
+}
+
+// CHECK-LABEL: sil @test_store
+// CHECK:   end_cow_mutation
+// CHECK:   [[I:%[0-9]+]] = integer_literal $Builtin.Int1, -1
+// CHECK:   begin_cow_mutation
+// CHECK:   return [[I]]
+// CHECK: } // end sil function 'test_store'
+sil @test_store : $@convention(thin) (@inout Buffer) -> Builtin.Int1 {
+bb0(%0 : $*Buffer):
+  %l = load %0 : $*Buffer
+  %e = end_cow_mutation %l : $Buffer
+  store %e to %0 : $*Buffer
+  (%u, %b) = begin_cow_mutation %e : $Buffer
+  store %b to %0 : $*Buffer
+  return %u : $Builtin.Int1
+}
+
+// CHECK-LABEL: sil @test_store_and_load
+// CHECK:   end_cow_mutation
+// CHECK:   ([[U:%[0-9]+]], {{.*}}) = begin_cow_mutation
+// CHECK:   return [[U]]
+// CHECK: } // end sil function 'test_store_and_load'
+sil @test_store_and_load : $@convention(thin) (@inout Buffer) -> Builtin.Int1 {
+bb0(%0 : $*Buffer):
+  %l = load %0 : $*Buffer
+  %e = end_cow_mutation %l : $Buffer
+  store %e to %0 : $*Buffer
+  %l2 = load %0 : $*Buffer
+  %f = function_ref @unknown : $@convention(thin) (@guaranteed Buffer) -> ()
+  apply %f(%l2) : $@convention(thin) (@guaranteed Buffer) -> ()
+  (%u, %b) = begin_cow_mutation %e : $Buffer
+  store %b to %0 : $*Buffer
+  return %u : $Builtin.Int1
+}
+
+// CHECK-LABEL: sil @test_store_and_load_outside_liverange
+// CHECK:   end_cow_mutation
+// CHECK:   [[I:%[0-9]+]] = integer_literal $Builtin.Int1, -1
+// CHECK:   begin_cow_mutation
+// CHECK:   return [[I]]
+// CHECK: } // end sil function 'test_store_and_load_outside_liverange'
+sil @test_store_and_load_outside_liverange : $@convention(thin) (@inout Buffer) -> Builtin.Int1 {
+bb0(%0 : $*Buffer):
+  %l = load %0 : $*Buffer
+  %e = end_cow_mutation %l : $Buffer
+  store %e to %0 : $*Buffer
+  (%u, %b) = begin_cow_mutation %e : $Buffer
+  %l2 = load %0 : $*Buffer
+  %f = function_ref @unknown : $@convention(thin) (@guaranteed Buffer) -> ()
+  apply %f(%l2) : $@convention(thin) (@guaranteed Buffer) -> ()
+  store %b to %0 : $*Buffer
+  return %u : $Builtin.Int1
+}
+
+// CHECK-LABEL: sil @test_loop
+// CHECK:   [[I:%[0-9]+]] = integer_literal $Builtin.Int1, -1
+// CHECK:   [[B:%[0-9]+]] = end_cow_mutation
+// CHECK:   [[T:%[0-9]+]] = tuple ([[I]] : $Builtin.Int1, [[B]] : $Buffer)
+// CHECK:   return [[T]]
+// CHECK: } // end sil function 'test_loop'
+sil @test_loop : $@convention(thin) (@owned Buffer) -> (Builtin.Int1, @owned Buffer) {
+bb0(%0 : $Buffer):
+  %e = end_cow_mutation %0 : $Buffer
+  br bb1(%e : $Buffer)
+bb1(%a : $Buffer):
+  (%u, %b) = begin_cow_mutation %a : $Buffer
+  %e2 = end_cow_mutation %b : $Buffer
+  cond_br undef, bb1(%e2 : $Buffer), bb2
+bb2:
+  %t = tuple (%u : $Builtin.Int1, %e2 : $Buffer)
+  return %t : $(Builtin.Int1, Buffer)
+}
+
+// CHECK-LABEL: sil @test_escape_in_loop
+// CHECK:   ([[U:%[0-9]+]], {{.*}}) = begin_cow_mutation
+// CHECK:   [[B:%[0-9]+]] = end_cow_mutation
+// CHECK:   [[T:%[0-9]+]] = tuple ([[U]] : $Builtin.Int1, [[B]] : $Buffer)
+// CHECK:   return [[T]]
+// CHECK: } // end sil function 'test_escape_in_loop'
+sil @test_escape_in_loop : $@convention(thin) (@owned Buffer) -> (Builtin.Int1, @owned Buffer) {
+bb0(%0 : $Buffer):
+  %f = function_ref @unknown : $@convention(thin) (@guaranteed Buffer) -> ()
+  %e = end_cow_mutation %0 : $Buffer
+  br bb1(%e : $Buffer)
+bb1(%a : $Buffer):
+  (%u, %b) = begin_cow_mutation %a : $Buffer
+  %e2 = end_cow_mutation %b : $Buffer
+  apply %f(%e2) : $@convention(thin) (@guaranteed Buffer) -> ()
+  cond_br undef, bb1(%e2 : $Buffer), bb2
+bb2:
+  %t = tuple (%u : $Builtin.Int1, %e2 : $Buffer)
+  return %t : $(Builtin.Int1, Buffer)
+}
+
+// CHECK-LABEL: sil @test_escape_outside_loop
+// CHECK:   [[I:%[0-9]+]] = integer_literal $Builtin.Int1, -1
+// CHECK:   [[B:%[0-9]+]] = end_cow_mutation
+// CHECK:   [[T:%[0-9]+]] = tuple ([[I]] : $Builtin.Int1, [[B]] : $Buffer)
+// CHECK:   return [[T]]
+// CHECK: } // end sil function 'test_escape_outside_loop'
+sil @test_escape_outside_loop : $@convention(thin) (@owned Buffer) -> (Builtin.Int1, @owned Buffer) {
+bb0(%0 : $Buffer):
+  %f = function_ref @unknown : $@convention(thin) (@guaranteed Buffer) -> ()
+  %e = end_cow_mutation %0 : $Buffer
+  br bb1(%e : $Buffer)
+bb1(%a : $Buffer):
+  (%u, %b) = begin_cow_mutation %a : $Buffer
+  %e2 = end_cow_mutation %b : $Buffer
+  cond_br undef, bb1(%e2 : $Buffer), bb2
+bb2:
+  apply %f(%e2) : $@convention(thin) (@guaranteed Buffer) -> ()
+  %t = tuple (%u : $Builtin.Int1, %e2 : $Buffer)
+  return %t : $(Builtin.Int1, Buffer)
+}
+

From e3ac98cdbfd33c2223fa4c098dcf0bd0a7ba0b73 Mon Sep 17 00:00:00 2001
From: Erik Eckstein <eeckstein@apple.com>
Date: Mon, 25 May 2020 14:53:11 +0200
Subject: [PATCH 02/10] FunctionSignatureTransforms: add a semantics attribute
 to prevent the owned-to-guarantee transformation.

If a function is annotated with @_semantics("optimize.sil.specialize.owned2guarantee.never") its arguments will not be converted from owned to guaranteed.
---
 include/swift/AST/SemanticAttrs.def                           | 2 ++
 .../OwnedToGuaranteedTransform.cpp                            | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/include/swift/AST/SemanticAttrs.def b/include/swift/AST/SemanticAttrs.def
index 81615e37d9c10..17994b7ddc319 100644
--- a/include/swift/AST/SemanticAttrs.def
+++ b/include/swift/AST/SemanticAttrs.def
@@ -67,6 +67,8 @@ SEMANTICS_ATTR(OPTIMIZE_SIL_SPECIALIZE_GENERIC_PARTIAL_NEVER,
           "optimize.sil.specialize.generic.partial.never")
 SEMANTICS_ATTR(OPTIMIZE_SIL_SPECIALIZE_GENERIC_SIZE_NEVER,
           "optimize.sil.specialize.generic.size.never")
+SEMANTICS_ATTR(OPTIMIZE_SIL_SPECIALIZE_OWNED2GUARANTEE_NEVER,
+          "optimize.sil.specialize.owned2guarantee.never")
 
 SEMANTICS_ATTR(OSLOG_MESSAGE_INIT_INTERPOLATION, "oslog.message.init_interpolation")
 SEMANTICS_ATTR(OSLOG_MESSAGE_INIT_STRING_LITERAL, "oslog.message.init_stringliteral")
diff --git a/lib/SILOptimizer/FunctionSignatureTransforms/OwnedToGuaranteedTransform.cpp b/lib/SILOptimizer/FunctionSignatureTransforms/OwnedToGuaranteedTransform.cpp
index f0e927613920a..e0b8f831b40c5 100644
--- a/lib/SILOptimizer/FunctionSignatureTransforms/OwnedToGuaranteedTransform.cpp
+++ b/lib/SILOptimizer/FunctionSignatureTransforms/OwnedToGuaranteedTransform.cpp
@@ -13,6 +13,7 @@
 #define DEBUG_TYPE "fso-owned-to-guaranteed-transform"
 #include "FunctionSignatureOpts.h"
 #include "swift/SIL/DebugUtils.h"
+#include "swift/AST/SemanticAttrs.h"
 #include "llvm/Support/CommandLine.h"
 
 using namespace swift;
@@ -258,6 +259,9 @@ void FunctionSignatureTransform::OwnedToGuaranteedAddResultRelease(
 bool FunctionSignatureTransform::OwnedToGuaranteedAnalyze() {
   if (FSODisableOwnedToGuaranteed)
     return false;
+  SILFunction *F = TransformDescriptor.OriginalFunction;
+  if (F->hasSemanticsAttr(semantics::OPTIMIZE_SIL_SPECIALIZE_OWNED2GUARANTEE_NEVER))
+    return false;
 
   const bool Result = OwnedToGuaranteedAnalyzeResults();
   const bool Params = OwnedToGuaranteedAnalyzeParameters();

From 4ca6b31500f931f8aa5d1772d8f3b99c09faf1eb Mon Sep 17 00:00:00 2001
From: Erik Eckstein <eeckstein@apple.com>
Date: Mon, 25 May 2020 15:25:57 +0200
Subject: [PATCH 03/10] SILCombine: remove dead casts and end_cow_mutation
 instructions which are only destroyed.

If the only use of an upcast, unchecked_ref_cast or end_cow_mutation is a destroy/release, just destroy the operand and remove the cast/end_cow_mutation.
---
 .../Analysis/SimplifyInstruction.cpp          | 27 ++++++++-
 test/IRGen/upcast.sil                         | 13 +++--
 test/SILOptimizer/cse.sil                     | 32 +++++------
 test/SILOptimizer/sil_combine.sil             | 55 +++++++++++++++----
 4 files changed, 89 insertions(+), 38 deletions(-)

diff --git a/lib/SILOptimizer/Analysis/SimplifyInstruction.cpp b/lib/SILOptimizer/Analysis/SimplifyInstruction.cpp
index 4c67b9d6fd060..c7be441ba87d5 100644
--- a/lib/SILOptimizer/Analysis/SimplifyInstruction.cpp
+++ b/lib/SILOptimizer/Analysis/SimplifyInstruction.cpp
@@ -62,6 +62,7 @@ namespace {
     SILValue visitUncheckedBitwiseCastInst(UncheckedBitwiseCastInst *UBCI);
     SILValue
     visitUncheckedTrivialBitCastInst(UncheckedTrivialBitCastInst *UTBCI);
+    SILValue visitEndCOWMutationInst(EndCOWMutationInst *ECM);
     SILValue visitThinFunctionToPointerInst(ThinFunctionToPointerInst *TFTPI);
     SILValue visitPointerToThinFunctionInst(PointerToThinFunctionInst *PTTFI);
     SILValue visitBeginAccessInst(BeginAccessInst *BAI);
@@ -329,6 +330,21 @@ visitUnconditionalCheckedCastInst(UnconditionalCheckedCastInst *UCCI) {
   return SILValue();
 }
 
+/// If the only use of a cast is a destroy, just destroy the cast operand.
+static SILValue simplifyDeadCast(SingleValueInstruction *Cast) {
+  for (Operand *op : Cast->getUses()) {
+    switch (op->getUser()->getKind()) {
+      case SILInstructionKind::DestroyValueInst:
+      case SILInstructionKind::StrongReleaseInst:
+      case SILInstructionKind::StrongRetainInst:
+        break;
+      default:
+        return SILValue();
+    }
+  }
+  return Cast->getOperand(0);
+}
+
 SILValue
 InstSimplifier::
 visitUncheckedRefCastInst(UncheckedRefCastInst *OPRI) {
@@ -351,7 +367,8 @@ visitUncheckedRefCastInst(UncheckedRefCastInst *OPRI) {
   if (OPRI->getOperand()->getType() == OPRI->getType())
     return OPRI->getOperand();
 
-  return SILValue();
+  // (destroy_value (unchecked_ref_cast x)) -> destroy_value x
+  return simplifyDeadCast(OPRI);
 }
 
 SILValue
@@ -375,7 +392,8 @@ SILValue InstSimplifier::visitUpcastInst(UpcastInst *UI) {
     if (URCI->getOperand()->getType() == UI->getType())
       return URCI->getOperand();
 
-  return SILValue();
+  // (destroy_value (upcast x)) -> destroy_value x
+  return simplifyDeadCast(UI);
 }
 
 #define LOADABLE_REF_STORAGE(Name, ...) \
@@ -410,6 +428,11 @@ visitUncheckedTrivialBitCastInst(UncheckedTrivialBitCastInst *UTBCI) {
   return SILValue();
 }
 
+SILValue InstSimplifier::visitEndCOWMutationInst(EndCOWMutationInst *ECM) {
+  // (destroy_value (end_cow_mutation x)) -> destroy_value x
+  return simplifyDeadCast(ECM);
+}
+
 SILValue
 InstSimplifier::
 visitUncheckedBitwiseCastInst(UncheckedBitwiseCastInst *UBCI) {
diff --git a/test/IRGen/upcast.sil b/test/IRGen/upcast.sil
index 365ea261e1985..809f696179ad4 100644
--- a/test/IRGen/upcast.sil
+++ b/test/IRGen/upcast.sil
@@ -2,19 +2,20 @@
 
 // Make sure that we are able to lower upcast addresses.
 
-// CHECK-LABEL: define{{( dllexport)?}}{{( protected)?}} swiftcc void @upcast_test(%T6upcast1DC** nocapture dereferenceable({{.*}}) %0) {{.*}} {
+// CHECK-LABEL: define{{( dllexport)?}}{{( protected)?}} swiftcc %T6upcast1CC* @upcast_test(%T6upcast1DC** nocapture dereferenceable({{.*}}) %0) {{.*}} {
 // CHECK: entry:
-// CHECK-NEXT: bitcast %T6upcast1DC** {{%[0-0]+}} to %T6upcast1CC**
-// CHECK-NEXT: ret void
+// CHECK-NEXT: [[A:%[0-9]+]] = bitcast %T6upcast1DC** {{%[0-0]+}} to %T6upcast1CC**
+// CHECK-NEXT: [[C:%[0-9]+]] = load %T6upcast1CC*, %T6upcast1CC** [[A]]
+// CHECK-NEXT: ret %T6upcast1CC* [[C]]
 
 class C {}
 sil_vtable C {}
 class D : C {}
 sil_vtable D {}
 
-sil @upcast_test : $@convention(thin) (@inout D) -> () {
+sil @upcast_test : $@convention(thin) (@inout D) -> C {
 bb0(%0 : $*D):
   %1 = upcast %0 : $*D to $*C
-  %33 = tuple()
-  return %33 : $()
+  %2 = load %1 : $*C
+  return %2 : $C
 }
diff --git a/test/SILOptimizer/cse.sil b/test/SILOptimizer/cse.sil
index 410d517635f16..52f65b49001b3 100644
--- a/test/SILOptimizer/cse.sil
+++ b/test/SILOptimizer/cse.sil
@@ -617,21 +617,18 @@ bb0(%0 : $FakeOptional):
 class C {}
 class D : C { }
 
-// CHECK-LABEL: sil @test1cse : $@convention(thin) (C) -> () {
-// CHECK: unchecked_ref_cast
+// CHECK-LABEL: sil @test1cse
+// CHECK: [[C:%[0-9]+]] = unchecked_ref_cast
 // CHECK-NOT: unchecked_ref_cast
-// CHECK: strong_release
-// CHECK: strong_release
-// CHECK: return
-sil @test1cse : $@convention(thin) (C) -> () {
+// CHECK: [[T:%[0-9]+]] = tuple ([[C]] : $Builtin.NativeObject, [[C]] : $Builtin.NativeObject)
+// CHECK: return [[T]]
+sil @test1cse : $@convention(thin) (C) -> (Builtin.NativeObject, Builtin.NativeObject) {
 bb0(%0 : $C):
   strong_retain %0 : $C
   %1 = unchecked_ref_cast %0 : $C to $Builtin.NativeObject
   %2 = unchecked_ref_cast %0 : $C to $Builtin.NativeObject
-  strong_release %1 : $Builtin.NativeObject
-  strong_release %2 : $Builtin.NativeObject
-  %5 = tuple()
-  return %5 : $()
+  %5 = tuple(%1 : $Builtin.NativeObject, %2 : $Builtin.NativeObject)
+  return %5 : $(Builtin.NativeObject, Builtin.NativeObject)
 }
 
 // CHECK-LABEL: sil @test2cse : $@convention(thin) (C) -> () {
@@ -704,23 +701,20 @@ bb0(%0 : $*Builtin.Int8):
 }
 
 // CHECK-LABEL: sil @cse_unchecked_ref_cast
-// CHECK: unchecked_ref_cast
+// CHECK: [[C:%[0-9]+]] = unchecked_ref_cast
 // CHECK-NOT: unchecked_ref_cast
-// CHECK: strong_release
-// CHECK: strong_release
-// CHECK: return
-sil @cse_unchecked_ref_cast : $@convention(thin) (@owned B, Builtin.Int1) -> () {
+// CHECK: [[T:%[0-9]+]] = tuple ([[C]] : $Builtin.NativeObject, [[C]] : $Builtin.NativeObject)
+// CHECK: return [[T]]
+sil @cse_unchecked_ref_cast : $@convention(thin) (@owned B, Builtin.Int1) -> (Builtin.NativeObject, Builtin.NativeObject) {
 bb0(%0: $B, %1: $Builtin.Int1):
   %5 = unchecked_ref_cast %0 : $B to $Builtin.NativeObject
-  strong_release %5 : $Builtin.NativeObject
   cond_br %1, bb1, bb2
 bb1:
   br bb2
 bb2:
   %21 = unchecked_ref_cast %0 : $B to $Builtin.NativeObject
-  strong_release %21 : $Builtin.NativeObject
-  %32 = tuple ()
-  return %32 : $()
+  %32 = tuple(%5 : $Builtin.NativeObject, %21 : $Builtin.NativeObject)
+  return %32 : $(Builtin.NativeObject, Builtin.NativeObject)
 }
 
 // CHECK-LABEL: sil @cse_raw_pointer_to_ref
diff --git a/test/SILOptimizer/sil_combine.sil b/test/SILOptimizer/sil_combine.sil
index 0b28ba548b130..cfb4377c29346 100644
--- a/test/SILOptimizer/sil_combine.sil
+++ b/test/SILOptimizer/sil_combine.sil
@@ -809,6 +809,44 @@ bb0(%0 : $C3):
   return %2 : $C1
 }
 
+// CHECK-LABEL: sil @dead_upcast
+// CHECK: bb0
+// CHECK-NEXT: strong_retain %0
+// CHECK-NEXT: strong_release %0
+// CHECK-NEXT: return
+sil @dead_upcast : $@convention(thin) (C2) -> C2 {
+bb0(%0 : $C2):
+  %1 = upcast %0 : $C2 to $C1
+  strong_retain %1 : $C1
+  strong_release %0 : $C2
+  return %0 : $C2
+}
+
+// CHECK-LABEL: sil @dead_unchecked_ref_cast
+// CHECK: bb0
+// CHECK-NEXT: strong_retain %0
+// CHECK-NEXT: strong_release %0
+// CHECK-NEXT: return
+sil @dead_unchecked_ref_cast : $@convention(thin) (C1) -> C1 {
+bb0(%0 : $C1):
+  %1 = unchecked_ref_cast %0 : $C1 to $C2
+  strong_retain %1 : $C2
+  strong_release %0 : $C1
+  return %0 : $C1
+}
+
+// CHECK-LABEL: sil @dead_end_cow_mutation
+// CHECK: bb0
+// CHECK-NEXT: strong_retain %0
+// CHECK-NEXT: strong_release %0
+// CHECK-NEXT: return
+sil @dead_end_cow_mutation : $@convention(thin) (C1) -> C1 {
+bb0(%0 : $C1):
+  %1 = end_cow_mutation %0 : $C1
+  strong_retain %1 : $C1
+  strong_release %0 : $C1
+  return %0 : $C1
+}
 
 struct XS {
   var m: Int
@@ -2294,14 +2332,13 @@ bb0(%0 : $B, %1 : $@sil_unowned B, %2 : $AnyObject, %3: $@sil_unmanaged AnyObjec
 // CHECK-NOT:     open_existential_ref
 // CHECK:         unchecked_ref_cast [[Ref]]
 
-sil @collapse_existential_pack_unpack_unchecked_ref_cast : $@convention(thin) (MyClass) -> () {
+sil @collapse_existential_pack_unpack_unchecked_ref_cast : $@convention(thin) (MyClass) -> Builtin.NativeObject {
 bb0(%0: $MyClass):
   %1 = init_existential_ref %0 : $MyClass : $MyClass, $AnyObject
   %2 = open_existential_ref %1 : $AnyObject to $@opened("2CAE06CE-5F10-11E4-AF13-C82A1428F987") AnyObject
   %3 = unchecked_ref_cast %2 : $@opened("2CAE06CE-5F10-11E4-AF13-C82A1428F987") AnyObject to $Builtin.NativeObject
   strong_retain %3: $Builtin.NativeObject
-  %5 = tuple ()
-  return %5 : $()
+  return %3 : $Builtin.NativeObject
 }
 
 // CHECK-LABEL: sil @collapse_existential_pack_unpack_ref_to_raw_pointer
@@ -2531,15 +2568,12 @@ sil @alloc_ref_dynamic_with_metatype_genneric : $<T where T : B>() -> () {
 // CHECK-NOT: alloc_ref_dynamic
 // CHECK-NEXT: [[R:%[0-9]+]] = alloc_ref $E
 // CHECK-NEXT: [[C:%[0-9]+]] = upcast [[R]] : $E to $B
-// CHECK-NEXT: strong_release [[C]]
-// CHECK: return
-sil @alloc_ref_dynamic_with_upcast_metatype : $() -> () {
+// CHECK-NEXT: return [[C]]
+sil @alloc_ref_dynamic_with_upcast_metatype : $() -> B {
   %1 = metatype $@thick E.Type
   %2 = upcast %1 : $@thick E.Type to $@thick B.Type
   %3 = alloc_ref_dynamic %2 : $@thick B.Type, $B
-  strong_release %3 : $B
-  %4 = tuple()
-  return %4 : $()
+  return %3 : $B
 }
 
 // CHECK-LABEL: @alloc_ref_dynamic_after_successful_checked_cast_br
@@ -2570,8 +2604,7 @@ bb3 (%10: $Builtin.Int32):
 // CHECK: bb1
 // CHECK-NOT: alloc_ref_dynamic
 // CHECK:      [[R:%[0-9]+]] = alloc_ref $E
-// CHECK-NEXT: [[C:%[0-9]+]] = upcast [[R]] : $E to $B
-// CHECK-NEXT: strong_release [[C]]
+// CHECK-NEXT: strong_release [[R]]
 sil @alloc_ref_dynamic_upcast_after_successful_checked_cast_br : $(@thick B.Type) -> Builtin.Int32 {
 bb0(%1 : $@thick B.Type):
   checked_cast_br [exact] %1 : $@thick B.Type to E.Type, bb1, bb2

From 01465d9ba4aac774596d67e59dead3c4a95ef784 Mon Sep 17 00:00:00 2001
From: Erik Eckstein <eeckstein@apple.com>
Date: Mon, 25 May 2020 15:45:17 +0200
Subject: [PATCH 04/10] SILCombine: Remove a cast if it's only used by an
 end_cow_mutation.

(end_cow_mutation (upcast X)) -> (end_cow_mutation X)
(end_cow_mutation (unchecked_ref_cast X)) -> (end_cow_mutation X)
---
 lib/SILOptimizer/SILCombiner/SILCombiner.h    |  1 +
 .../SILCombiner/SILCombinerCastVisitors.cpp   | 20 +++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/lib/SILOptimizer/SILCombiner/SILCombiner.h b/lib/SILOptimizer/SILCombiner/SILCombiner.h
index 8a7435272e26c..508fa6e324a94 100644
--- a/lib/SILOptimizer/SILCombiner/SILCombiner.h
+++ b/lib/SILOptimizer/SILCombiner/SILCombiner.h
@@ -183,6 +183,7 @@ class SILCombiner :
   SILInstruction *visitPointerToAddressInst(PointerToAddressInst *PTAI);
   SILInstruction *visitUncheckedAddrCastInst(UncheckedAddrCastInst *UADCI);
   SILInstruction *visitUncheckedRefCastInst(UncheckedRefCastInst *URCI);
+  SILInstruction *visitEndCOWMutationInst(EndCOWMutationInst *URCI);
   SILInstruction *visitUncheckedRefCastAddrInst(UncheckedRefCastAddrInst *URCI);
   SILInstruction *visitBridgeObjectToRefInst(BridgeObjectToRefInst *BORI);
   SILInstruction *visitUnconditionalCheckedCastInst(
diff --git a/lib/SILOptimizer/SILCombiner/SILCombinerCastVisitors.cpp b/lib/SILOptimizer/SILCombiner/SILCombinerCastVisitors.cpp
index e370a62e16e8e..86081cf98d946 100644
--- a/lib/SILOptimizer/SILCombiner/SILCombinerCastVisitors.cpp
+++ b/lib/SILOptimizer/SILCombiner/SILCombinerCastVisitors.cpp
@@ -264,6 +264,26 @@ SILCombiner::visitUncheckedRefCastInst(UncheckedRefCastInst *URCI) {
   return nullptr;
 }
 
+SILInstruction *SILCombiner::visitEndCOWMutationInst(EndCOWMutationInst *ECM) {
+
+  // Remove a cast if it's only used by an end_cow_mutation.
+  //
+  // (end_cow_mutation (upcast X)) -> (end_cow_mutation X)
+  // (end_cow_mutation (unchecked_ref_cast X)) -> (end_cow_mutation X)
+  SILValue op = ECM->getOperand();
+  if (!isa<UncheckedRefCastInst>(op) && !isa<UpcastInst>(op))
+    return nullptr;
+  if (!op->hasOneUse())
+    return nullptr;
+
+  SingleValueInstruction *refCast = cast<SingleValueInstruction>(op);
+  auto *newECM = Builder.createEndCOWMutation(ECM->getLoc(),
+                                              refCast->getOperand(0));
+  ECM->replaceAllUsesWith(refCast);
+  refCast->setOperand(0, newECM);
+  refCast->moveAfter(newECM);
+  return eraseInstFromFunction(*ECM);
+}
 
 SILInstruction *
 SILCombiner::visitBridgeObjectToRefInst(BridgeObjectToRefInst *BORI) {

From 2403e56eb5253295dc6785693b03d0b77c9d96e9 Mon Sep 17 00:00:00 2001
From: Erik Eckstein <eeckstein@apple.com>
Date: Mon, 25 May 2020 16:59:06 +0200
Subject: [PATCH 05/10] SIL: new "array.end_mutation" and
 "array.finalize_intrinsic" array semantics

Used to "finalize" an array literal. It's not used, yet. So this is NFC.
Also handle the "array.finalize_intrinsic" function in various array specific optimizations.
---
 include/swift/AST/SemanticAttrs.def           |  1 +
 .../SILOptimizer/Analysis/ArraySemantic.h     |  6 ++++-
 lib/SILOptimizer/Analysis/ArraySemantic.cpp   | 11 +++++---
 .../LoopTransforms/COWArrayOpt.cpp            |  2 ++
 .../Transforms/ArrayCountPropagation.cpp      | 19 +++++++++++---
 .../ArrayElementValuePropagation.cpp          | 26 +++++++++++--------
 .../Transforms/DeadObjectElimination.cpp      | 16 ++++++++----
 lib/SILOptimizer/Utils/ConstExpr.cpp          | 15 +++++++++++
 test/SILOptimizer/array_count_propagation.sil |  7 +++--
 .../array_element_propagation.sil             | 25 ++++++++++--------
 test/SILOptimizer/dead_array_elim.sil         |  9 ++++++-
 11 files changed, 98 insertions(+), 39 deletions(-)

diff --git a/include/swift/AST/SemanticAttrs.def b/include/swift/AST/SemanticAttrs.def
index 17994b7ddc319..944d56b9348d6 100644
--- a/include/swift/AST/SemanticAttrs.def
+++ b/include/swift/AST/SemanticAttrs.def
@@ -51,6 +51,7 @@ SEMANTICS_ATTR(ARRAY_GET_ELEMENT_ADDRESS, "array.get_element_address")
 SEMANTICS_ATTR(ARRAY_INIT, "array.init")
 SEMANTICS_ATTR(ARRAY_INIT_EMPTY, "array.init.empty")
 SEMANTICS_ATTR(ARRAY_MAKE_MUTABLE, "array.make_mutable")
+SEMANTICS_ATTR(ARRAY_END_MUTATION, "array.end_mutation")
 SEMANTICS_ATTR(ARRAY_MUTATE_UNKNOWN, "array.mutate_unknown")
 SEMANTICS_ATTR(ARRAY_PROPS_IS_NATIVE_TYPE_CHECKED, "array.props.isNativeTypeChecked")
 SEMANTICS_ATTR(ARRAY_RESERVE_CAPACITY_FOR_APPEND, "array.reserve_capacity_for_append")
diff --git a/include/swift/SILOptimizer/Analysis/ArraySemantic.h b/include/swift/SILOptimizer/Analysis/ArraySemantic.h
index 0d6c51b729c98..67b7156804793 100644
--- a/include/swift/SILOptimizer/Analysis/ArraySemantic.h
+++ b/include/swift/SILOptimizer/Analysis/ArraySemantic.h
@@ -31,6 +31,7 @@ enum class ArrayCallKind {
   kGetElement,
   kGetElementAddress,
   kMakeMutable,
+  kEndMutation,
   kMutateUnknown,
   kReserveCapacityForAppend,
   kWithUnsafeMutableBufferPointer,
@@ -42,7 +43,8 @@ enum class ArrayCallKind {
   // before this comment.
   kArrayInit,
   kArrayUninitialized,
-  kArrayUninitializedIntrinsic
+  kArrayUninitializedIntrinsic,
+  kArrayFinalizeIntrinsic
 };
 
 /// Return true is the given function is an array semantics call.
@@ -78,6 +80,8 @@ class ArraySemanticsCall {
   ArraySemanticsCall(SILValue V, StringRef semanticName,
                      bool matchPartialName);
 
+  ArraySemanticsCall() : SemanticsCall(nullptr) {}
+
   /// Can we hoist this call.
   bool canHoist(SILInstruction *To, DominanceInfo *DT) const;
 
diff --git a/lib/SILOptimizer/Analysis/ArraySemantic.cpp b/lib/SILOptimizer/Analysis/ArraySemantic.cpp
index c4d47098c2960..59ba1ba17a87f 100644
--- a/lib/SILOptimizer/Analysis/ArraySemantic.cpp
+++ b/lib/SILOptimizer/Analysis/ArraySemantic.cpp
@@ -33,12 +33,14 @@ ArrayCallKind swift::getArraySemanticsKind(SILFunction *f) {
             .StartsWith("array.init", ArrayCallKind::kArrayInit)
             .Case("array.uninitialized", ArrayCallKind::kArrayUninitialized)
             .Case("array.uninitialized_intrinsic", ArrayCallKind::kArrayUninitializedIntrinsic)
+            .Case("array.finalize_intrinsic", ArrayCallKind::kArrayFinalizeIntrinsic)
             .Case("array.check_subscript", ArrayCallKind::kCheckSubscript)
             .Case("array.check_index", ArrayCallKind::kCheckIndex)
             .Case("array.get_count", ArrayCallKind::kGetCount)
             .Case("array.get_capacity", ArrayCallKind::kGetCapacity)
             .Case("array.get_element", ArrayCallKind::kGetElement)
             .Case("array.make_mutable", ArrayCallKind::kMakeMutable)
+            .Case("array.end_mutation", ArrayCallKind::kEndMutation)
             .Case("array.get_element_address",
                   ArrayCallKind::kGetElementAddress)
             .Case("array.mutate_unknown", ArrayCallKind::kMutateUnknown)
@@ -342,9 +344,9 @@ bool swift::ArraySemanticsCall::canHoist(SILInstruction *InsertBefore,
     return canHoistArrayArgument(SemanticsCall, getSelf(), InsertBefore, DT);
   }
 
-  case ArrayCallKind::kMakeMutable: {
+  case ArrayCallKind::kMakeMutable:
+  case ArrayCallKind::kEndMutation:
     return canHoistArrayArgument(SemanticsCall, getSelf(), InsertBefore, DT);
-  }
   } // End switch.
 
   return false;
@@ -492,8 +494,8 @@ ApplyInst *swift::ArraySemanticsCall::hoistOrCopy(SILInstruction *InsertBefore,
     return Call;
   }
 
-  case ArrayCallKind::kMakeMutable: {
-    assert(!LeaveOriginal && "Copying not yet implemented");
+  case ArrayCallKind::kMakeMutable:
+  case ArrayCallKind::kEndMutation: {
     // Hoist the call.
     auto Call = hoistOrCopyCall(SemanticsCall, InsertBefore, LeaveOriginal, DT);
     return Call;
@@ -569,6 +571,7 @@ bool swift::ArraySemanticsCall::doesNotChangeArray() const {
     case ArrayCallKind::kGetCount:
     case ArrayCallKind::kGetCapacity:
     case ArrayCallKind::kGetElement:
+    case ArrayCallKind::kEndMutation:
       return true;
   }
 }
diff --git a/lib/SILOptimizer/LoopTransforms/COWArrayOpt.cpp b/lib/SILOptimizer/LoopTransforms/COWArrayOpt.cpp
index a6a0fef10d3ba..fc4ca58f5659c 100644
--- a/lib/SILOptimizer/LoopTransforms/COWArrayOpt.cpp
+++ b/lib/SILOptimizer/LoopTransforms/COWArrayOpt.cpp
@@ -288,6 +288,7 @@ static bool isNonMutatingArraySemanticCall(SILInstruction *Inst) {
   case ArrayCallKind::kGetCapacity:
   case ArrayCallKind::kGetElement:
   case ArrayCallKind::kGetElementAddress:
+  case ArrayCallKind::kEndMutation:
     return true;
   case ArrayCallKind::kMakeMutable:
   case ArrayCallKind::kMutateUnknown:
@@ -296,6 +297,7 @@ static bool isNonMutatingArraySemanticCall(SILInstruction *Inst) {
   case ArrayCallKind::kArrayInit:
   case ArrayCallKind::kArrayUninitialized:
   case ArrayCallKind::kArrayUninitializedIntrinsic:
+  case ArrayCallKind::kArrayFinalizeIntrinsic:
   case ArrayCallKind::kAppendContentsOf:
   case ArrayCallKind::kAppendElement:
     return false;
diff --git a/lib/SILOptimizer/Transforms/ArrayCountPropagation.cpp b/lib/SILOptimizer/Transforms/ArrayCountPropagation.cpp
index eb790a3dc3c0b..846037d58f657 100644
--- a/lib/SILOptimizer/Transforms/ArrayCountPropagation.cpp
+++ b/lib/SILOptimizer/Transforms/ArrayCountPropagation.cpp
@@ -135,13 +135,24 @@ bool ArrayAllocation::recursivelyCollectUses(ValueBase *Def) {
     }
 
     // Check array semantic calls.
-    if (auto apply = dyn_cast<ApplyInst>(User)) {
+    if (auto *apply = dyn_cast<ApplyInst>(User)) {
       ArraySemanticsCall ArrayOp(apply);
-      if (ArrayOp && ArrayOp.doesNotChangeArray()) {
-        if (ArrayOp.getKind() == ArrayCallKind::kGetCount)
+      switch (ArrayOp.getKind()) {
+        case ArrayCallKind::kNone:
+          return false;
+        case ArrayCallKind::kGetCount:
           CountCalls.insert(ArrayOp);
-        continue;
+          break;
+        case ArrayCallKind::kArrayFinalizeIntrinsic:
+          if (!recursivelyCollectUses(apply))
+            return false;
+          break;
+        default:
+          if (!ArrayOp.doesNotChangeArray())
+            return false;
+          break;
       }
+      continue;
     }
 
     // An operation that escapes or modifies the array value.
diff --git a/lib/SILOptimizer/Transforms/ArrayElementValuePropagation.cpp b/lib/SILOptimizer/Transforms/ArrayElementValuePropagation.cpp
index 8a24b9d41cf66..3b31bdd2edaa8 100644
--- a/lib/SILOptimizer/Transforms/ArrayElementValuePropagation.cpp
+++ b/lib/SILOptimizer/Transforms/ArrayElementValuePropagation.cpp
@@ -138,20 +138,24 @@ bool ArrayAllocation::recursivelyCollectUses(ValueBase *Def) {
 
     // Check array semantic calls.
     ArraySemanticsCall ArrayOp(User);
-    if (ArrayOp) {
-      if (ArrayOp.getKind() == ArrayCallKind::kAppendContentsOf) {
+    switch (ArrayOp.getKind()) {
+      case ArrayCallKind::kNone:
+        return false;
+      case ArrayCallKind::kAppendContentsOf:
         AppendContentsOfCalls.push_back(ArrayOp);
-        continue;
-      } else if (ArrayOp.getKind() == ArrayCallKind::kGetElement) {
+        break;
+      case ArrayCallKind::kGetElement:
         GetElementCalls.insert(ArrayOp);
-        continue;
-      } else if (ArrayOp.doesNotChangeArray()) {
-        continue;
-      }
+        break;
+      case ArrayCallKind::kArrayFinalizeIntrinsic:
+        if (!recursivelyCollectUses(cast<SingleValueInstruction>(User)))
+          return false;
+        break;
+      default:
+        if (ArrayOp.doesNotChangeArray())
+          break;
+        return false;
     }
-
-    // An operation that escapes or modifies the array value.
-    return false;
   }
   return true;
 }
diff --git a/lib/SILOptimizer/Transforms/DeadObjectElimination.cpp b/lib/SILOptimizer/Transforms/DeadObjectElimination.cpp
index 350db1805b13d..477a4512b22db 100644
--- a/lib/SILOptimizer/Transforms/DeadObjectElimination.cpp
+++ b/lib/SILOptimizer/Transforms/DeadObjectElimination.cpp
@@ -445,13 +445,13 @@ recursivelyCollectInteriorUses(ValueBase *DefInst,
       continue;
     }
     // Recursively follow projections.
-    if (auto ProjInst = dyn_cast<SingleValueInstruction>(User)) {
-      ProjectionIndex PI(ProjInst);
+    if (auto *svi = dyn_cast<SingleValueInstruction>(User)) {
+      ProjectionIndex PI(svi);
       if (PI.isValid()) {
         IndexTrieNode *ProjAddrNode = AddressNode;
         bool ProjInteriorAddr = IsInteriorAddress;
-        if (Projection::isAddressProjection(ProjInst)) {
-          if (isa<IndexAddrInst>(ProjInst)) {
+        if (Projection::isAddressProjection(svi)) {
+          if (isa<IndexAddrInst>(svi)) {
             // Don't support indexing within an interior address.
             if (IsInteriorAddress)
               return false;
@@ -466,13 +466,19 @@ recursivelyCollectInteriorUses(ValueBase *DefInst,
           // Don't expect to extract values once we've taken an address.
           return false;
         }
-        if (!recursivelyCollectInteriorUses(ProjInst,
+        if (!recursivelyCollectInteriorUses(svi,
                                             ProjAddrNode->getChild(PI.Index),
                                             ProjInteriorAddr)) {
           return false;
         }
         continue;
       }
+      ArraySemanticsCall AS(svi);
+      if (AS.getKind() == swift::ArrayCallKind::kArrayFinalizeIntrinsic) {
+        if (!recursivelyCollectInteriorUses(svi, AddressNode, IsInteriorAddress))
+          return false;
+        continue;
+      }
     }
     // Otherwise bail.
     LLVM_DEBUG(llvm::dbgs() << "        Found an escaping use: " << *User);
diff --git a/lib/SILOptimizer/Utils/ConstExpr.cpp b/lib/SILOptimizer/Utils/ConstExpr.cpp
index 9716ec28085fb..01a36e1f8907f 100644
--- a/lib/SILOptimizer/Utils/ConstExpr.cpp
+++ b/lib/SILOptimizer/Utils/ConstExpr.cpp
@@ -45,6 +45,8 @@ enum class WellKnownFunction {
   ArrayInitEmpty,
   // Array._allocateUninitializedArray
   AllocateUninitializedArray,
+  // Array._endMutation
+  EndArrayMutation,
   // Array.append(_:)
   ArrayAppendElement,
   // String.init()
@@ -71,6 +73,8 @@ static llvm::Optional<WellKnownFunction> classifyFunction(SILFunction *fn) {
     return WellKnownFunction::ArrayInitEmpty;
   if (fn->hasSemanticsAttr(semantics::ARRAY_UNINITIALIZED_INTRINSIC))
     return WellKnownFunction::AllocateUninitializedArray;
+  if (fn->hasSemanticsAttr(semantics::ARRAY_END_MUTATION))
+    return WellKnownFunction::EndArrayMutation;
   if (fn->hasSemanticsAttr(semantics::ARRAY_APPEND_ELEMENT))
     return WellKnownFunction::ArrayAppendElement;
   if (fn->hasSemanticsAttr(semantics::STRING_INIT_EMPTY))
@@ -946,6 +950,17 @@ ConstExprFunctionState::computeWellKnownCallResult(ApplyInst *apply,
                                                 resultType, allocator));
     return None;
   }
+  case WellKnownFunction::EndArrayMutation: {
+    // This function has the following signature in SIL:
+    //    (@inout Array<Element>) -> ()
+    assert(conventions.getNumParameters() == 1 &&
+           conventions.getNumDirectSILResults() == 0 &&
+           conventions.getNumIndirectSILResults() == 0 &&
+           "unexpected Array._endMutation() signature");
+
+    // _endMutation is a no-op.
+    return None;
+  }
   case WellKnownFunction::ArrayAppendElement: {
     // This function has the following signature in SIL:
     //    (@in Element, @inout Array<Element>) -> ()
diff --git a/test/SILOptimizer/array_count_propagation.sil b/test/SILOptimizer/array_count_propagation.sil
index 4cae662c6a564..1defa16de572b 100644
--- a/test/SILOptimizer/array_count_propagation.sil
+++ b/test/SILOptimizer/array_count_propagation.sil
@@ -29,6 +29,7 @@ sil [_semantics "array.uninitialized"] @adoptStorage : $@convention(thin) (@owne
 sil [_semantics "array.get_count"] @getCount : $@convention(method) (@guaranteed MyArray<MyInt>) -> MyInt
 sil [_semantics "array.get_element"] @getElement : $@convention(method) (MyInt, MyBool, @guaranteed MyArray<MyInt>) -> @out MyInt
 sil [_semantics "array.uninitialized"] @allocateUninitialized : $@convention(thin) (MyInt, @thin MyArray<MyInt>.Type) -> @owned (MyArray<MyInt>, UnsafeMutablePointer<MyInt>)
+sil [_semantics "array.finalize_intrinsic"] @finalize : $@convention(thin) (@owned MyArray<MyInt>) -> @owned MyArray<MyInt>
 sil [_semantics "array.init"] @initRepeatedValueCount : $@convention(thin) (@in MyInt, MyInt, @thin MyArray<MyInt>.Type) -> @owned MyArray<MyInt>
 sil [_semantics "array.init"] @initEmpty : $@convention(thin) (@thin MyArray<MyInt>.Type) -> @owned MyArray<MyInt>
 
@@ -50,9 +51,11 @@ bb0:
  %7 = tuple_extract %6 : $(MyArray<MyInt>, UnsafeMutablePointer<MyInt>), 0
  %8 = tuple_extract %6 : $(MyArray<MyInt>, UnsafeMutablePointer<MyInt>), 1
  debug_value %7 : $MyArray<MyInt>
+ %f = function_ref @finalize : $@convention(thin) (@owned MyArray<MyInt>) -> @owned MyArray<MyInt>
+ %a = apply %f(%7) : $@convention(thin) (@owned MyArray<MyInt>) -> @owned MyArray<MyInt>
  %9 = function_ref @getCount : $@convention(method) (@guaranteed MyArray<MyInt>) -> MyInt
- %10 = apply %9(%7) : $@convention(method) (@guaranteed MyArray<MyInt>) -> MyInt
- %12 = struct_extract %7 : $MyArray<MyInt>, #MyArray._buffer
+ %10 = apply %9(%a) : $@convention(method) (@guaranteed MyArray<MyInt>) -> MyInt
+ %12 = struct_extract %a : $MyArray<MyInt>, #MyArray._buffer
  %13 = struct_extract %12 : $_MyArrayBuffer<MyInt>, #_MyArrayBuffer._storage
  %14 = struct_extract %13 : $_MyBridgeStorage, #_MyBridgeStorage.rawValue
  strong_release %14 : $Builtin.BridgeObject
diff --git a/test/SILOptimizer/array_element_propagation.sil b/test/SILOptimizer/array_element_propagation.sil
index adf7ac72861a8..136a8c0282fa8 100644
--- a/test/SILOptimizer/array_element_propagation.sil
+++ b/test/SILOptimizer/array_element_propagation.sil
@@ -34,6 +34,7 @@ sil [_semantics "array.get_element"] @getElement2 : $@convention(method) (MyInt,
 sil @unknown_array_use : $@convention(method) (@guaranteed MyArray<MyInt>) -> MyBool
 sil [_semantics "array.uninitialized"] @arrayAdoptStorage : $@convention(thin) (@owned AnyObject, MyInt, @thin Array<MyInt>.Type) -> @owned (Array<MyInt>, UnsafeMutablePointer<MyInt>)
 sil @arrayInit : $@convention(method) (@thin Array<MyInt>.Type) -> @owned Array<MyInt>
+sil [_semantics "array.finalize_intrinsic"] @finalize : $@convention(thin) (@owned MyArray<MyInt>) -> @owned MyArray<MyInt>
 sil [_semantics "array.append_contentsOf"] @arrayAppendContentsOf : $@convention(method) (@owned Array<MyInt>, @inout Array<MyInt>) -> ()
 
 // CHECK-LABEL: sil @propagate01
@@ -88,35 +89,37 @@ sil @propagate01 : $@convention(thin) () -> () {
   %19 = integer_literal $Builtin.Int64, 2
   %20 = struct $MyInt (%19 : $Builtin.Int64)
   store %20 to %18 : $*MyInt
-  %23 = struct_extract %7 : $MyArray<MyInt>, #MyArray._buffer
+  %f = function_ref @finalize : $@convention(thin) (@owned MyArray<MyInt>) -> @owned MyArray<MyInt>
+  %a = apply %f(%7) : $@convention(thin) (@owned MyArray<MyInt>) -> @owned MyArray<MyInt>
+  %23 = struct_extract %a : $MyArray<MyInt>, #MyArray._buffer
   %24 = struct_extract %23 : $_MyArrayBuffer<MyInt>, #_MyArrayBuffer._storage
   %25 = struct_extract %24 : $_MyBridgeStorage, #_MyBridgeStorage.rawValue
   %26 = alloc_stack $MyInt
-  debug_value %7 : $MyArray<MyInt>
+  debug_value %a : $MyArray<MyInt>
   %27 = function_ref @hoistableIsNativeTypeChecked : $@convention(method) (@guaranteed MyArray<MyInt>) -> MyBool
-  %28 = apply %27(%7) : $@convention(method) (@guaranteed MyArray<MyInt>) -> MyBool
+  %28 = apply %27(%a) : $@convention(method) (@guaranteed MyArray<MyInt>) -> MyBool
   debug_value %28 : $MyBool                        // id: %104
   %29 = function_ref @checkSubscript : $@convention(method) (MyInt, MyBool, @guaranteed MyArray<MyInt>) -> _MyDependenceToken
-  %30 = apply %29(%12, %28, %7) : $@convention(method) (MyInt, MyBool, @guaranteed MyArray<MyInt>) -> _MyDependenceToken
+  %30 = apply %29(%12, %28, %a) : $@convention(method) (MyInt, MyBool, @guaranteed MyArray<MyInt>) -> _MyDependenceToken
   debug_value %30 : $_MyDependenceToken
   %31 = function_ref @getElement : $@convention(method) (MyInt, MyBool, _MyDependenceToken, @guaranteed MyArray<MyInt>) -> @out MyInt
-  %32 = apply %31(%26, %12, %28, %30, %7) : $@convention(method) (MyInt, MyBool, _MyDependenceToken, @guaranteed MyArray<MyInt>) -> @out MyInt
+  %32 = apply %31(%26, %12, %28, %30, %a) : $@convention(method) (MyInt, MyBool, _MyDependenceToken, @guaranteed MyArray<MyInt>) -> @out MyInt
   %35 = alloc_stack $MyInt
   debug_value %16 : $MyInt
-  debug_value %7 : $MyArray<MyInt>
+  debug_value %a : $MyArray<MyInt>
   debug_value %28 : $MyBool
   strong_retain %25 : $Builtin.BridgeObject
-  %36 = apply %29(%16, %28, %7) : $@convention(method) (MyInt, MyBool, @guaranteed MyArray<MyInt>) -> _MyDependenceToken
+  %36 = apply %29(%16, %28, %a) : $@convention(method) (MyInt, MyBool, @guaranteed MyArray<MyInt>) -> _MyDependenceToken
   debug_value %36 : $_MyDependenceToken
-  %37 = apply %31(%35, %16, %28, %36, %7) : $@convention(method) (MyInt, MyBool, _MyDependenceToken, @guaranteed MyArray<MyInt>) -> @out MyInt
+  %37 = apply %31(%35, %16, %28, %36, %a) : $@convention(method) (MyInt, MyBool, _MyDependenceToken, @guaranteed MyArray<MyInt>) -> @out MyInt
   strong_release %25 : $Builtin.BridgeObject
   %44 = alloc_stack $MyInt
-  debug_value %7 : $MyArray<MyInt>
+  debug_value %a : $MyArray<MyInt>
   debug_value %28 : $MyBool
   strong_retain %25 : $Builtin.BridgeObject
-  %45 = apply %29(%20, %28, %7) : $@convention(method) (MyInt, MyBool, @guaranteed MyArray<MyInt>) -> _MyDependenceToken
+  %45 = apply %29(%20, %28, %a) : $@convention(method) (MyInt, MyBool, @guaranteed MyArray<MyInt>) -> _MyDependenceToken
   debug_value %45 : $_MyDependenceToken
-  %46 = apply %31(%44, %20, %28, %45, %7) : $@convention(method) (MyInt, MyBool, _MyDependenceToken, @guaranteed MyArray<MyInt>) -> @out MyInt
+  %46 = apply %31(%44, %20, %28, %45, %a) : $@convention(method) (MyInt, MyBool, _MyDependenceToken, @guaranteed MyArray<MyInt>) -> @out MyInt
   strong_release %25 : $Builtin.BridgeObject
   %52 = tuple ()
   dealloc_stack %44 : $*MyInt
diff --git a/test/SILOptimizer/dead_array_elim.sil b/test/SILOptimizer/dead_array_elim.sil
index 620dbb5cec86e..61aceb8d0c06b 100644
--- a/test/SILOptimizer/dead_array_elim.sil
+++ b/test/SILOptimizer/dead_array_elim.sil
@@ -25,6 +25,8 @@ sil [_semantics "array.uninitialized_intrinsic"] @allocArray : $@convention(thin
 
 sil [_semantics "array.uninitialized"] @adoptStorageSpecialiedForInt : $@convention(method) (@guaranteed _ContiguousArrayStorage<Int>, Builtin.Word, @thin Array<Int>.Type) -> (@owned Array<Int>, UnsafeMutablePointer<Int>)
 
+sil [_semantics "array.finalize_intrinsic"] @finalize : $@convention(thin) (@owned Array<Int>) -> @owned Array<Int>
+
 // CHECK-LABEL: sil @deadarrayWithAdoptStorage : $@convention(thin) () -> () {
 // CHECK-NOT: alloc_ref
 // CHECK-NOT: strong_release
@@ -36,10 +38,15 @@ bb0:
   %7 = metatype $@thin Array<Int>.Type
   %8 = function_ref @adoptStorageSpecialiedForInt : $@convention(method) (@guaranteed _ContiguousArrayStorage<Int>, Builtin.Word, @thin Array<Int>.Type) -> (@owned Array<Int>, UnsafeMutablePointer<Int>)
   %9 = apply %8(%6, %0, %7) : $@convention(method) (@guaranteed _ContiguousArrayStorage<Int>, Builtin.Word, @thin Array<Int>.Type) -> (@owned Array<Int>, UnsafeMutablePointer<Int>)
-  strong_release %6 : $_ContiguousArrayStorage<Int>
   %10 = tuple_extract %9 : $(Array<Int>, UnsafeMutablePointer<Int>), 0
   %11 = tuple_extract %9 : $(Array<Int>, UnsafeMutablePointer<Int>), 1
   %12 = struct_extract %11 : $UnsafeMutablePointer<Int>, #UnsafeMutablePointer._rawValue
+  %f = function_ref @finalize : $@convention(thin) (@owned Array<Int>) -> @owned Array<Int>
+  %a = apply %f(%10) : $@convention(thin) (@owned Array<Int>) -> @owned Array<Int>
+  %13 = struct_extract %a : $Array<Int>, #Array._buffer
+  %14 = struct_extract %13 : $_ArrayBuffer<Int>, #_ArrayBuffer._storage
+  %15 = struct_extract %14 : $_BridgeStorage<__ContiguousArrayStorageBase>, #_BridgeStorage.rawValue
+  strong_release %15 : $Builtin.BridgeObject
   %9999 = tuple()
   return %9999 : $()
 }

From 33c8e16ce04300f792c995b7f611d14fe3f4b0a0 Mon Sep 17 00:00:00 2001
From: Erik Eckstein <eeckstein@apple.com>
Date: Mon, 25 May 2020 19:55:30 +0200
Subject: [PATCH 06/10] SIL optimizer: Support begin_cow_mutation and
 end_cow_mutation in some optimizations.

Mostly this is about "looking through" a begin_cow_mutation or end_cow_mutation.
---
 lib/SIL/Utils/InstructionUtils.cpp                  |  6 ++++--
 lib/SIL/Utils/Projection.cpp                        | 11 +++++++----
 lib/SILOptimizer/Analysis/EscapeAnalysis.cpp        |  1 +
 .../SILCombiner/SILCombinerMiscVisitors.cpp         |  1 +
 .../Transforms/RedundantLoadElimination.cpp         |  1 +
 test/SILOptimizer/redundant_load_elim.sil           | 13 +++++++++++++
 6 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/lib/SIL/Utils/InstructionUtils.cpp b/lib/SIL/Utils/InstructionUtils.cpp
index b51e15605b172..9e8ffa340805c 100644
--- a/lib/SIL/Utils/InstructionUtils.cpp
+++ b/lib/SIL/Utils/InstructionUtils.cpp
@@ -128,7 +128,8 @@ SILValue swift::stripCastsWithoutMarkDependence(SILValue V) {
 
     auto K = V->getKind();
     if (isRCIdentityPreservingCast(K) ||
-        K == ValueKind::UncheckedTrivialBitCastInst) {
+        K == ValueKind::UncheckedTrivialBitCastInst ||
+        K == ValueKind::EndCOWMutationInst) {
       V = cast<SingleValueInstruction>(V)->getOperand(0);
       continue;
     }
@@ -308,7 +309,8 @@ bool swift::onlyAffectsRefCount(SILInstruction *user) {
 }
 
 bool swift::mayCheckRefCount(SILInstruction *User) {
-  return isa<IsUniqueInst>(User) || isa<IsEscapingClosureInst>(User);
+  return isa<IsUniqueInst>(User) || isa<IsEscapingClosureInst>(User) ||
+         isa<BeginCOWMutationInst>(User);
 }
 
 bool swift::isSanitizerInstrumentation(SILInstruction *Instruction) {
diff --git a/lib/SIL/Utils/Projection.cpp b/lib/SIL/Utils/Projection.cpp
index 2881469102ea1..849e5ebcd95a9 100644
--- a/lib/SIL/Utils/Projection.cpp
+++ b/lib/SIL/Utils/Projection.cpp
@@ -371,10 +371,13 @@ Optional<ProjectionPath> ProjectionPath::getProjectionPath(SILValue Start,
 
   auto Iter = End;
   while (Start != Iter) {
-    Projection AP(Iter);
-    if (!AP.isValid())
-      break;
-    P.Path.push_back(AP);
+    // end_cow_mutation is not a projection, but we want to "see through" it.
+    if (!isa<EndCOWMutationInst>(Iter)) {
+      Projection AP(Iter);
+      if (!AP.isValid())
+        break;
+      P.Path.push_back(AP);
+    }
     Iter = cast<SingleValueInstruction>(*Iter).getOperand(0);
   }
 
diff --git a/lib/SILOptimizer/Analysis/EscapeAnalysis.cpp b/lib/SILOptimizer/Analysis/EscapeAnalysis.cpp
index 5b02943c33a88..cbe9cd0db4448 100644
--- a/lib/SILOptimizer/Analysis/EscapeAnalysis.cpp
+++ b/lib/SILOptimizer/Analysis/EscapeAnalysis.cpp
@@ -2285,6 +2285,7 @@ void EscapeAnalysis::analyzeInstruction(SILInstruction *I,
     case SILInstructionKind::SelectValueInst:
       analyzeSelectInst(cast<SelectValueInst>(I), ConGraph);
       return;
+    case SILInstructionKind::EndCOWMutationInst:
     case SILInstructionKind::StructInst:
     case SILInstructionKind::TupleInst:
     case SILInstructionKind::EnumInst: {
diff --git a/lib/SILOptimizer/SILCombiner/SILCombinerMiscVisitors.cpp b/lib/SILOptimizer/SILCombiner/SILCombinerMiscVisitors.cpp
index 0dcb55ce8ecaf..2d79a39f6d9eb 100644
--- a/lib/SILOptimizer/SILCombiner/SILCombinerMiscVisitors.cpp
+++ b/lib/SILOptimizer/SILCombiner/SILCombinerMiscVisitors.cpp
@@ -699,6 +699,7 @@ static bool isZeroLoadFromEmptyCollection(LoadInst *LI) {
       case ValueKind::UpcastInst:
       case ValueKind::RawPointerToRefInst:
       case ValueKind::AddressToPointerInst:
+      case ValueKind::EndCOWMutationInst:
         addr = cast<SingleValueInstruction>(addr)->getOperand(0);
         break;
       default:
diff --git a/lib/SILOptimizer/Transforms/RedundantLoadElimination.cpp b/lib/SILOptimizer/Transforms/RedundantLoadElimination.cpp
index e7bfef35bbb4e..06968a86b32a5 100644
--- a/lib/SILOptimizer/Transforms/RedundantLoadElimination.cpp
+++ b/lib/SILOptimizer/Transforms/RedundantLoadElimination.cpp
@@ -159,6 +159,7 @@ static bool isRLEInertInstruction(SILInstruction *Inst) {
   case SILInstructionKind::CondFailInst:
   case SILInstructionKind::IsEscapingClosureInst:
   case SILInstructionKind::IsUniqueInst:
+  case SILInstructionKind::EndCOWMutationInst:
   case SILInstructionKind::FixLifetimeInst:
   case SILInstructionKind::EndAccessInst:
   case SILInstructionKind::SetDeallocatingInst:
diff --git a/test/SILOptimizer/redundant_load_elim.sil b/test/SILOptimizer/redundant_load_elim.sil
index 684c7a1890df2..633a72cd588ca 100644
--- a/test/SILOptimizer/redundant_load_elim.sil
+++ b/test/SILOptimizer/redundant_load_elim.sil
@@ -157,6 +157,19 @@ bb0(%0 : $AB):
   return %5 : $Int                               // id: %15
 }
 
+// CHECK-LABEL: sil hidden @load_forward_across_end_cow_mutation
+// CHECK-NOT: = load
+// CHECK: return %1
+sil hidden @load_forward_across_end_cow_mutation : $@convention(thin) (@owned AB, Int) -> Int {
+bb0(%0 : $AB, %1 : $Int):
+  %2 = ref_element_addr %0 : $AB, #AB.value
+  store %1 to %2 : $*Int
+  %4 = end_cow_mutation %0 : $AB
+  %5 = ref_element_addr %4 : $AB, #AB.value
+  %6 = load %5 : $*Int
+  return %6 : $Int
+}
+
 // CHECK-LABEL: sil hidden @redundant_load_across_fixlifetime_inst
 // CHECK: = load
 // CHECK-NOT: = load

From 16bd756f7beebbd4e6dbff8f5ea031a823074e1d Mon Sep 17 00:00:00 2001
From: Erik Eckstein <eeckstein@apple.com>
Date: Mon, 25 May 2020 20:10:16 +0200
Subject: [PATCH 07/10] tests: make some test more robust for optimizer
 changes.

This is in preparation for COW support. More optimizer tests require an optimized non-assert stdlib build.
---
 test/Driver/opt-remark.swift                                | 6 ++++--
 test/IRGen/multithread_module.swift                         | 1 +
 test/IRGen/objc_protocol_extended_method_types.swift        | 3 +++
 .../generic_specialization_loops_detection_with_loops.swift | 2 ++
 test/SILOptimizer/hello-world.swift                         | 2 +-
 test/SILOptimizer/licm_exclusivity.swift                    | 2 +-
 test/SILOptimizer/stack_promotion_array_literal.swift       | 4 ++--
 7 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/test/Driver/opt-remark.swift b/test/Driver/opt-remark.swift
index 0c3978eab4e86..95facf952c5d0 100644
--- a/test/Driver/opt-remark.swift
+++ b/test/Driver/opt-remark.swift
@@ -3,6 +3,8 @@
 // RUN: %target-swiftc_driver -O -Rpass=sil-inliner %s -o %t/throwaway 2>&1 | %FileCheck -check-prefix=REMARK_PASSED %s
 // RUN: %target-swiftc_driver -O -Rpass-missed=sil-inliner %s -o %t/throwaway 2>&1 | %FileCheck -check-prefix=REMARK_MISSED %s
 
+// REQUIRES: swift_stdlib_no_asserts,optimized_stdlib
+
 // DEFAULT-NOT: remark:
 
 func big() {
@@ -39,11 +41,11 @@ func small() {
 
 func foo() {
   // REMARK_MISSED-NOT: remark: {{.*}} inlined
-  // REMARK_MISSED: opt-remark.swift:44:2: remark: Not profitable to inline function "throwaway.big()" (cost = {{.*}}, benefit = {{.*}})
+  // REMARK_MISSED: opt-remark.swift:46:2: remark: Not profitable to inline function "throwaway.big()" (cost = {{.*}}, benefit = {{.*}})
   // REMARK_MISSED-NOT: remark: {{.*}} inlined
 	big()
   // REMARK_PASSED-NOT: remark: Not profitable
-  // REMARK_PASSED: opt-remark.swift:48:3: remark: "throwaway.small()" inlined into "throwaway.foo()" (cost = {{.*}}, benefit = {{.*}})
+  // REMARK_PASSED: opt-remark.swift:50:3: remark: "throwaway.small()" inlined into "throwaway.foo()" (cost = {{.*}}, benefit = {{.*}})
   // REMARK_PASSED-NOT: remark: Not profitable
   small()
 }
diff --git a/test/IRGen/multithread_module.swift b/test/IRGen/multithread_module.swift
index 83eec8c2d3a1a..895e710cc965c 100644
--- a/test/IRGen/multithread_module.swift
+++ b/test/IRGen/multithread_module.swift
@@ -9,6 +9,7 @@
 // RUN: %target-codesign %t/a.out
 // RUN: %target-run %t/a.out | %FileCheck %s
 // REQUIRES: executable_test
+// REQUIRES: optimized_stdlib,swift_stdlib_no_asserts
 
 
 // Test compilation of a module in multi-threaded compilation.
diff --git a/test/IRGen/objc_protocol_extended_method_types.swift b/test/IRGen/objc_protocol_extended_method_types.swift
index 87db4b883d49a..3b23b810e7ec7 100644
--- a/test/IRGen/objc_protocol_extended_method_types.swift
+++ b/test/IRGen/objc_protocol_extended_method_types.swift
@@ -5,6 +5,9 @@
 
 // REQUIRES: OS=macosx
 // REQUIRES: objc_interop
+// TODO: fix test for unoptimized stdlib with asserts
+// REQUIRES: swift_stdlib_no_asserts,optimized_stdlib
+
 
 import Foundation
 
diff --git a/test/SILOptimizer/generic_specialization_loops_detection_with_loops.swift b/test/SILOptimizer/generic_specialization_loops_detection_with_loops.swift
index 30d8f319fbf2a..5c1bf0233dd8d 100644
--- a/test/SILOptimizer/generic_specialization_loops_detection_with_loops.swift
+++ b/test/SILOptimizer/generic_specialization_loops_detection_with_loops.swift
@@ -1,5 +1,7 @@
 // RUN: %target-swift-frontend -O -emit-sil -enforce-exclusivity=unchecked -Xllvm -sil-print-generic-specialization-loops -Xllvm -sil-print-generic-specialization-info %s 2>&1 | %FileCheck --check-prefix=CHECK %s
 
+// REQUIRES: swift_stdlib_no_asserts,optimized_stdlib
+
 // Check that the generic specializer does not hang a compiler by
 // creating and infinite loop of generic specializations.
 
diff --git a/test/SILOptimizer/hello-world.swift b/test/SILOptimizer/hello-world.swift
index 32009c597ec85..29a29f8681db2 100644
--- a/test/SILOptimizer/hello-world.swift
+++ b/test/SILOptimizer/hello-world.swift
@@ -1,6 +1,6 @@
 // RUN: rm -rf %t && mkdir -p %t/stats
 // RUN: %target-swift-frontend -emit-sil -stats-output-dir %t/stats %s -o /dev/null
 // RUN: %{python} %utils/process-stats-dir.py --evaluate 'NumSILGenFunctions < 10' %t/stats
-// RUN: %{python} %utils/process-stats-dir.py --evaluate 'NumSILOptFunctions < 10' %t/stats
+// RUN: %{python} %utils/process-stats-dir.py --evaluate 'NumSILOptFunctions < 20' %t/stats
 
 print("Hello world")
diff --git a/test/SILOptimizer/licm_exclusivity.swift b/test/SILOptimizer/licm_exclusivity.swift
index 1354f4caf93a7..7ada653d6745e 100644
--- a/test/SILOptimizer/licm_exclusivity.swift
+++ b/test/SILOptimizer/licm_exclusivity.swift
@@ -4,7 +4,7 @@
 // RUN: %target-swift-frontend -O -enforce-exclusivity=checked -emit-sil -Xllvm -debug-only=sil-licm -whole-module-optimization %s 2>&1 | %FileCheck %s --check-prefix=TESTLICMWMO
 // RUN: %target-swift-frontend -O -enforce-exclusivity=checked -emit-sil  -whole-module-optimization %s | %FileCheck %s --check-prefix=TESTSILWMO
 
-// REQUIRES: optimized_stdlib,asserts
+// REQUIRES: optimized_stdlib,asserts,swift_stdlib_no_asserts
 // REQUIRES: PTRSIZE=64
 
 // TESTLICM-LABEL: Processing loops in {{.*}}run_ReversedArray{{.*}}
diff --git a/test/SILOptimizer/stack_promotion_array_literal.swift b/test/SILOptimizer/stack_promotion_array_literal.swift
index 1f6b4e2c2a35b..3e5d3c890905b 100644
--- a/test/SILOptimizer/stack_promotion_array_literal.swift
+++ b/test/SILOptimizer/stack_promotion_array_literal.swift
@@ -7,12 +7,12 @@
 // CHECK-LABEL: sil @{{.*}}testit
 // CHECK:  alloc_ref [stack] [tail_elems
 
-public func testit(_ N: Int) {
+public func testit(_ N: Int, _ x: Int) {
   for _ in 0..<N {
     for _ in 0..<10 {
        var nums = [Int]()
        for _ in 0..<40_000 {
-         nums += [1, 2, 3, 4, 5, 6, 7]
+         nums += [1, 2, 3, 4, 5, 6, x]
        }
     }
   }

From 99f4d854292e22213330c1eca36c5ce0bcdab42b Mon Sep 17 00:00:00 2001
From: Erik Eckstein <eeckstein@apple.com>
Date: Mon, 25 May 2020 20:41:12 +0200
Subject: [PATCH 08/10] ArraySemantics: support "array.check_subscript" as a
 two-parameter function.

Support a version of Array._checkSubscript which has no wasNativeTypeChecked parameter.
---
 lib/SILOptimizer/Analysis/ArraySemantic.cpp | 63 +++++++++++----------
 1 file changed, 33 insertions(+), 30 deletions(-)

diff --git a/lib/SILOptimizer/Analysis/ArraySemantic.cpp b/lib/SILOptimizer/Analysis/ArraySemantic.cpp
index 59ba1ba17a87f..abfd4510a4cbe 100644
--- a/lib/SILOptimizer/Analysis/ArraySemantic.cpp
+++ b/lib/SILOptimizer/Analysis/ArraySemantic.cpp
@@ -103,10 +103,12 @@ bool swift::ArraySemanticsCall::isValidSignature() {
   }
   case ArrayCallKind::kCheckSubscript: {
     // Int, Bool, Self
-    if (SemanticsCall->getNumArguments() != 3 ||
-        !SemanticsCall->getArgument(0)->getType().isTrivial(*F))
+    unsigned numArgs = SemanticsCall->getNumArguments();
+    if (numArgs != 2 && numArgs != 3)
+      return false;
+    if (!SemanticsCall->getArgument(0)->getType().isTrivial(*F))
       return false;
-    if (!SemanticsCall->getArgument(1)->getType().isTrivial(*F))
+    if (numArgs == 3 && !SemanticsCall->getArgument(1)->getType().isTrivial(*F))
       return false;
     auto SelfConvention = FnTy->getSelfParameter().getConvention();
     return SelfConvention == ParameterConvention::Direct_Guaranteed ||
@@ -326,23 +328,22 @@ bool swift::ArraySemanticsCall::canHoist(SILInstruction *InsertBefore,
     // Not implemented yet.
     return false;
 
-  case ArrayCallKind::kCheckSubscript: {
-    auto IsNativeArg = getArrayPropertyIsNativeTypeChecked();
-    ArraySemanticsCall IsNative(IsNativeArg,
-                                "array.props.isNativeTypeChecked", true);
-    if (!IsNative) {
-      // Do we have a constant parameter?
-      auto *SI = dyn_cast<StructInst>(IsNativeArg);
-      if (!SI)
-        return false;
-      if (!isa<IntegerLiteralInst>(SI->getOperand(0)))
+  case ArrayCallKind::kCheckSubscript:
+    if (SILValue IsNativeArg = getArrayPropertyIsNativeTypeChecked()) {
+      ArraySemanticsCall IsNative(IsNativeArg,
+                                  "array.props.isNativeTypeChecked", true);
+      if (!IsNative) {
+        // Do we have a constant parameter?
+        auto *SI = dyn_cast<StructInst>(IsNativeArg);
+        if (!SI)
+          return false;
+        if (!isa<IntegerLiteralInst>(SI->getOperand(0)))
+          return false;
+      } else if (!IsNative.canHoist(InsertBefore, DT))
+        // Otherwise, we must be able to hoist the function call.
         return false;
-    } else if (!IsNative.canHoist(InsertBefore, DT))
-      // Otherwise, we must be able to hoist the function call.
-      return false;
-
+    }
     return canHoistArrayArgument(SemanticsCall, getSelf(), InsertBefore, DT);
-  }
 
   case ArrayCallKind::kMakeMutable:
   case ArrayCallKind::kEndMutation:
@@ -450,9 +451,8 @@ ApplyInst *swift::ArraySemanticsCall::hoistOrCopy(SILInstruction *InsertBefore,
         hoistOrCopySelf(SemanticsCall, InsertBefore, DT, LeaveOriginal);
 
     SILValue NewArrayProps;
-    if (Kind == ArrayCallKind::kCheckSubscript) {
+    if (SILValue IsNativeArg = getArrayPropertyIsNativeTypeChecked()) {
       // Copy the array.props argument call.
-      auto IsNativeArg = getArrayPropertyIsNativeTypeChecked();
       ArraySemanticsCall IsNative(IsNativeArg,
                                   "array.props.isNativeTypeChecked", true);
       if (!IsNative) {
@@ -517,14 +517,15 @@ void swift::ArraySemanticsCall::removeCall() {
 
   switch (getKind()) {
   default: break;
-  case ArrayCallKind::kCheckSubscript: {
-    // Remove all uses with the empty tuple ().
-    auto EmptyDep = SILBuilderWithScope(SemanticsCall)
-                        .createStruct(SemanticsCall->getLoc(),
-                                      SemanticsCall->getType(), {});
-    SemanticsCall->replaceAllUsesWith(EmptyDep);
-  }
-  break;
+  case ArrayCallKind::kCheckSubscript:
+    if (!SemanticsCall->getType().isVoid()){
+      // Remove all uses with the empty tuple ().
+      auto EmptyDep = SILBuilderWithScope(SemanticsCall)
+                          .createStruct(SemanticsCall->getLoc(),
+                                        SemanticsCall->getType(), {});
+      SemanticsCall->replaceAllUsesWith(EmptyDep);
+    }
+    break;
   case ArrayCallKind::kGetElement: {
     // Remove the matching isNativeTypeChecked and check_subscript call.
     ArraySemanticsCall IsNative(getTypeCheckedArgument(),
@@ -554,11 +555,13 @@ SILValue
 swift::ArraySemanticsCall::getArrayPropertyIsNativeTypeChecked() const {
   switch (getKind()) {
     case ArrayCallKind::kCheckSubscript:
-      return SemanticsCall->getArgument(1);
+      if (SemanticsCall->getNumArguments() == 3)
+        return SemanticsCall->getArgument(1);
+      return SILValue();
     case ArrayCallKind::kGetElement:
       return getTypeCheckedArgument();
     default:
-      llvm_unreachable("Must have an array.props argument");
+      return SILValue();
   }
 }
 

From 216eec2d211d5d5c541405070eea86d9aaeb0601 Mon Sep 17 00:00:00 2001
From: Erik Eckstein <eeckstein@apple.com>
Date: Mon, 25 May 2020 20:43:19 +0200
Subject: [PATCH 09/10] SIL optimizer: add an additional LICM pass to the
 pipeline.

The COWOpts optimization relies more on LICM. This additional run of the pass ensures that there is no phase ordering issue between LICM and COWOpts
---
 lib/SILOptimizer/PassManager/PassPipeline.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/SILOptimizer/PassManager/PassPipeline.cpp b/lib/SILOptimizer/PassManager/PassPipeline.cpp
index 259024fe49f22..a51a9123aee06 100644
--- a/lib/SILOptimizer/PassManager/PassPipeline.cpp
+++ b/lib/SILOptimizer/PassManager/PassPipeline.cpp
@@ -515,6 +515,7 @@ static void addMidLevelFunctionPipeline(SILPassPipelinePlan &P) {
   P.addLICM();
   // Run loop unrolling after inlining and constant propagation, because loop
   // trip counts may have became constant.
+  P.addLICM();
   P.addLoopUnroll();
 }
 

From 3f42ad704c3c7f5b575d845438a0509747602048 Mon Sep 17 00:00:00 2001
From: Erik Eckstein <eeckstein@apple.com>
Date: Mon, 25 May 2020 21:07:24 +0200
Subject: [PATCH 10/10] DeadObjectElimination: don't let fix_lifetime prevent
 dead array elimination

---
 lib/SILOptimizer/Transforms/DeadObjectElimination.cpp | 3 ++-
 test/SILOptimizer/dead_array_elim.sil                 | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/SILOptimizer/Transforms/DeadObjectElimination.cpp b/lib/SILOptimizer/Transforms/DeadObjectElimination.cpp
index 477a4512b22db..fff5fafdde1f7 100644
--- a/lib/SILOptimizer/Transforms/DeadObjectElimination.cpp
+++ b/lib/SILOptimizer/Transforms/DeadObjectElimination.cpp
@@ -412,7 +412,8 @@ recursivelyCollectInteriorUses(ValueBase *DefInst,
 
     // Lifetime endpoints that don't allow the address to escape.
     if (isa<RefCountingInst>(User) ||
-        isa<DebugValueInst>(User)) {
+        isa<DebugValueInst>(User) ||
+        isa<FixLifetimeInst>(User)) {
       AllUsers.insert(User);
       continue;
     }
diff --git a/test/SILOptimizer/dead_array_elim.sil b/test/SILOptimizer/dead_array_elim.sil
index 61aceb8d0c06b..5d05d554ab50b 100644
--- a/test/SILOptimizer/dead_array_elim.sil
+++ b/test/SILOptimizer/dead_array_elim.sil
@@ -43,6 +43,7 @@ bb0:
   %12 = struct_extract %11 : $UnsafeMutablePointer<Int>, #UnsafeMutablePointer._rawValue
   %f = function_ref @finalize : $@convention(thin) (@owned Array<Int>) -> @owned Array<Int>
   %a = apply %f(%10) : $@convention(thin) (@owned Array<Int>) -> @owned Array<Int>
+  fix_lifetime %a : $Array<Int>
   %13 = struct_extract %a : $Array<Int>, #Array._buffer
   %14 = struct_extract %13 : $_ArrayBuffer<Int>, #_ArrayBuffer._storage
   %15 = struct_extract %14 : $_BridgeStorage<__ContiguousArrayStorageBase>, #_BridgeStorage.rawValue