From 9627a05e005277752492f57cd0fed33de2a739b8 Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Fri, 5 Jul 2019 17:31:31 -0700
Subject: [PATCH] Document EscapeAnalysis.

Describe the algorithm in the file-level doc comment. The basic
algorithm in the referenced paper is similar, but the most
interesting/important information is how it is adapted to SIL.
---
 .../SILOptimizer/Analysis/EscapeAnalysis.h    | 169 ++++++++++++++++--
 lib/SILOptimizer/Analysis/EscapeAnalysis.cpp  |   5 +
 2 files changed, 162 insertions(+), 12 deletions(-)

diff --git a/include/swift/SILOptimizer/Analysis/EscapeAnalysis.h b/include/swift/SILOptimizer/Analysis/EscapeAnalysis.h
index addaf68763610..e7ba4e63f4d36 100644
--- a/include/swift/SILOptimizer/Analysis/EscapeAnalysis.h
+++ b/include/swift/SILOptimizer/Analysis/EscapeAnalysis.h
@@ -9,6 +9,140 @@
 // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
 //
 //===----------------------------------------------------------------------===//
+///
+/// EscapeAnalysis provides information about whether the lifetime of an object
+/// exceeds the scope of a function.
+///
+/// We compute escape analysis by building a connection graph for each
+/// function. For interprocedural analysis the connection graphs are merged
+/// in bottom-up order of the call graph.
+/// The idea is based on "Escape analysis for Java." by J.-D. Choi, M. Gupta, M.
+/// Serrano, V. C. Sreedhar, and S. Midkiff
+/// http://dx.doi.org/10.1145/320384.320386
+///
+/// This design is customized for SIL and the Swift memory model as follows:
+///
+/// Each SILValue holding a memory address or object reference is mapped to a
+/// node in the connection graph. The node's type depends on the value's
+/// origin. SILArguments have "argument" type. Locally allocated storage and
+/// values of unknown origin have "value" type. Loaded values have "content"
+/// type. A "return" type node represents the returned value and has no
+/// associated SILValue.
+///
+/// "Content" nodes are special in that they represent the identity of some set
+/// of memory locations. Content nodes are created to represent the memory
+/// pointed to by one of the other node types. So, except for loads, SILValues
+/// do not directly map to content nodes. For debugging purposes only, content
+/// nodes do refer back to the SILValue that originally pointed to them. When
+/// content nodes are merged, only one of those SILValue back-references is
+/// arbitrarily preserved. The content of the returned value is the only content
+/// node that has no back-reference to a SILValue.
+///
+/// This code:
+///   let a = SomeClass()
+///   return a
+///
+/// Generates the following connection graph, where 'a' is in the SILValue %0:
+///   Val %0 Esc: R, Succ: (%0.1) // Represents 'a', and points to 'a's content
+///   Con %0.1 Esc: G, Succ:      // Represents the content of 'a'
+///   Ret  Esc: R, Succ: %0       // The returned value, aliased with 'a'
+///
+/// Each node has an escaping state: None, (R)eturn, (A)rguments, or (G)lobal.
+/// These states form a lattice in which None is the most refined, or top, state
+/// and Global is the least refined, or bottom, state. Merging nodes performs a
+/// meet operation on their escaping states. At a call site, the callee graph is
+/// merged with the callee graph by merging the respective call argument
+/// nodes. A node has a "Return" escaping state if it only escapes by being
+/// returned from the current function. A node has an "Argument" escaping state
+/// if only escapes by being passed as an incoming argument to this function.
+///
+/// A directed edge between two connection graph nodes indicates that the memory
+/// represented by the destination node memory is reachable via an address
+/// contained in the source node. A node may only have one "pointsTo" edge,
+/// whose destination is always a content node. Additional "defer" edges allow a
+/// form of aliasing between nodes. A single content node represents any and all
+/// memory that any other node may point to. This content node can be found by
+/// following any path of defer edges until the path terminates in a pointsTo
+/// edge. The final pointsTo edge refers to the representative content node, and
+/// all such paths in the graph must reach the same content node. To maintain
+/// this invariant, the algorithm that builds the connection graph must
+/// incrementally merge content nodes.
+///
+/// Note that a defer edge may occur between any node types. A value node that
+/// holds a reference may defer to another value or content node whose value was
+/// merged via a phi; a content node that holds a reference may defer to a value
+/// node that was stored into the content; a content node may defer to another
+/// content node that was loaded and stored.
+///
+/// Now consider the same example, but declaring a 'var' instead of a 'let':
+///
+///   var a = SomeClass()
+///   return a
+///
+/// Generates the following connection graph, where the alloc_stack for variable
+/// 'a' is in the SILValue %0 and class allocation returns SILValue %3.
+///   Val %0 Esc: G, Succ: (%0.1)
+///   Con %0.1 Esc: G, Succ: %3
+///   Val %3 Esc: G, Succ: (%3.1)
+///   Con %3.1 Esc: G, Succ:
+///   Ret  Esc: R, Succ: %3
+///
+/// The value node for variable 'a' now points to local variable storage
+/// (%0.1). That local variable storage contains a reference. Assignment into
+/// that reference creates a defer edge to the allocated reference (%3). The
+/// allocated reference in turn points to the object storage (%3.1).
+///
+/// Note that a variable holding a single class reference and a variable
+/// holding a non-trivial struct has the same graph representation. The
+/// variable's content node only represents the value of the references, not the
+/// memory pointed-to by the reference.
+///
+/// A pointsTo edge does not necessarily indicate pointer indirection. It may
+/// simply represent a derived address within the same object. This allows
+/// escape analysis to view an object's memory in layers, each with separate
+/// escaping properties. For example, a class object's first-level content node
+/// represents the object header including the metadata pointer and reference
+/// count. An object's second level content node only represents the
+/// reference-holding fields within that object. Consider the connection graph
+/// for a class with properties:
+///
+///   class HasObj {
+///     var obj: AnyObject
+///   }
+///   func assignProperty(h: HasObj, o: AnyObject) {
+///     h.obj = o
+///   }
+///
+/// Which generates this graph where the argument 'h' is %0, and 'o' is %1:
+///   Arg %0 Esc: A, Succ: (%0.1)
+///   Con %0.1 Esc: A, Succ: (%0.2)
+///   Con %0.2 Esc: A, Succ: %1
+///   Arg %1 Esc: A, Succ: (%1.1)
+///   Con %1.1 Esc: A, Succ: (%1.2)
+///   Con %1.2 Esc: G, Succ:
+///
+/// Node %0.1 represents the header of 'h', including reference count and
+/// metadata pointer. This node points to %0.2 which represents the 'obj'
+/// property. The assignment 'h.obj = o' creates a defer edge from %0.2 to
+/// %1. Similarly, %1.1 represents the header of 'o', and %1.2 represents any
+/// potential nontrivial properties in 'o' which may have escaped globally when
+/// 'o' was released.
+///
+/// The connection graph is constructed by summarizing all memory operations in
+/// a flow-insensitive way. Hint: ConGraph->viewCG() displays the Dot-formatted
+/// connection graph.
+///
+/// In addition to the connection graph, EscapeAnalysis stores information about
+/// "use points". Each release operation is a use points. These instructions are
+/// recorded in a table and given an ID. Each connection graph node stores a
+/// bitset indicating the use points reachable via the CFG by that node. This
+/// provides some flow-sensitive information on top of the otherwise flow
+/// insensitive connection graph.
+///
+/// Note: storing bitsets in each node may be unnecessary overhead since the
+/// same information can be obtained with a graph traversal, typically of only
+/// 1-3 hops.
+// ===---------------------------------------------------------------------===//
 
 #ifndef SWIFT_SILOPTIMIZER_ANALYSIS_ESCAPEANALYSIS_H_
 #define SWIFT_SILOPTIMIZER_ANALYSIS_ESCAPEANALYSIS_H_
@@ -28,15 +162,9 @@ namespace swift {
 
 class BasicCalleeAnalysis;
 
-/// The EscapeAnalysis provides information if the lifetime of an object exceeds
-/// the scope of a function.
-///
-/// We compute the escape analysis by building a connection graph for each
-/// function. For the interprocedural analysis the connection graphs are merged
-/// in bottom-up order of the call graph.
-/// The idea is based on "Escape analysis for Java." by J.-D. Choi, M. Gupta, M.
-/// Serrano, V. C. Sreedhar, and S. Midkiff
-/// http://dx.doi.org/10.1145/320384.320386
+/// The EscapeAnalysis results for functions in the current module, computed
+/// bottom-up in the call graph. Each function with valid EscapeAnalysis
+/// information is associated with a ConnectionGraph.
 class EscapeAnalysis : public BottomUpIPAnalysis {
 
   /// The types of edges in the connection graph.
@@ -169,8 +297,21 @@ class EscapeAnalysis : public BottomUpIPAnalysis {
     NodeType Type;
     
     /// The constructor.
-    CGNode(ValueBase *V, NodeType Type) :
-        V(V), UsePoints(0), Type(Type) { }
+    CGNode(ValueBase *V, NodeType Type) : V(V), UsePoints(0), Type(Type) {
+      switch (Type) {
+      case NodeType::Argument:
+      case NodeType::Value:
+        assert(V);
+        break;
+      case NodeType::Return:
+        assert(!V);
+        break;
+      case NodeType::Content:
+        // A content node representing the returned value has no associated
+        // SILValue.
+        break;
+      }
+    }
 
     /// Merges the state from another state and returns true if it changed.
     bool mergeEscapeState(EscapeState OtherState) {
@@ -452,7 +593,11 @@ class EscapeAnalysis : public BottomUpIPAnalysis {
     /// Returns null, if V is not a "pointer".
     CGNode *getNode(ValueBase *V, EscapeAnalysis *EA, bool createIfNeeded = true);
 
-    /// Gets or creates a content node to which \a AddrNode points to.
+    /// Gets or creates a content node to which \a AddrNode points to during
+    /// initial graph construction. This may not be called after defer edges
+    /// have been created. Doing so would break the invariant that all
+    /// non-content nodes ultimately have a pointsTo edge to a single content
+    /// node.
     CGNode *getContentNode(CGNode *AddrNode);
 
     /// Get or creates a pseudo node for the function return value.
diff --git a/lib/SILOptimizer/Analysis/EscapeAnalysis.cpp b/lib/SILOptimizer/Analysis/EscapeAnalysis.cpp
index 707926afece43..19d33eddec597 100644
--- a/lib/SILOptimizer/Analysis/EscapeAnalysis.cpp
+++ b/lib/SILOptimizer/Analysis/EscapeAnalysis.cpp
@@ -1413,8 +1413,13 @@ void EscapeAnalysis::analyzeInstruction(SILInstruction *I,
         // the object itself (because it will be a dangling pointer after
         // deallocation).
         CGNode *CapturedByDeinit = ConGraph->getContentNode(AddrNode);
+        // Get the content node for the object's properties. The object header
+        // itself cannot escape from the deinit.
         CapturedByDeinit = ConGraph->getContentNode(CapturedByDeinit);
         if (deinitIsKnownToNotCapture(OpV)) {
+          // Presumably this is necessary because, even though the deinit
+          // doesn't escape the immediate properties of this class, it may
+          // indirectly escape some other memory content(?)
           CapturedByDeinit = ConGraph->getContentNode(CapturedByDeinit);
         }
         ConGraph->setEscapesGlobal(CapturedByDeinit);