Track aggregation memory for both sort and hash

Andrew Or · Andrew Or · commit e541d64d4dec · 2015-08-06T19:29:21.000-07:00
Previous commit only tracked it for hash-based aggregation.
We should track it in both cases.
diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -166,6 +166,8 @@ public final class BytesToBytesMap {
 
   private long numHashCollisions = 0;
 
+  private long peakMemoryUsedBytes = 0L;
+
   public BytesToBytesMap(
       TaskMemoryManager taskMemoryManager,
       ShuffleMemoryManager shuffleMemoryManager,
@@ -658,6 +660,7 @@ private void allocate(int capacity) {
    * This method is idempotent and can be called multiple times.
    */
   public void free() {
+    updatePeakMemoryUsed();
     longArray = null;
     bitset = null;
     Iterator<MemoryBlock> dataPagesIterator = dataPages.iterator();
@@ -684,7 +687,6 @@ public long getPageSizeBytes() {
 
   /**
    * Returns the total amount of memory, in bytes, consumed by this map's managed structures.
-   * Note that this is also the peak memory used by this map, since the map is append-only.
    */
   public long getTotalMemoryConsumption() {
     long totalDataPagesSize = 0L;
@@ -694,6 +696,21 @@ public long getTotalMemoryConsumption() {
     return totalDataPagesSize + bitset.memoryBlock().size() + longArray.memoryBlock().size();
   }
 
+  private void updatePeakMemoryUsed() {
+    long mem = getTotalMemoryConsumption();
+    if (mem > peakMemoryUsedBytes) {
+      peakMemoryUsedBytes = mem;
+    }
+  }
+
+  /**
+   * Return the peak memory used so far, in bytes.
+   */
+  public long getPeakMemoryUsedBytes() {
+    updatePeakMemoryUsed();
+    return peakMemoryUsedBytes;
+  }
+
   /**
    * Returns the total amount of time spent resizing this map (in nanoseconds).
    */
diff --git a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
@@ -525,7 +525,7 @@ public void resizingLargeMap() {
   }
 
   @Test
-  public void testTotalMemoryConsumption() {
+  public void testPeakMemoryUsed() {
     final long recordLengthBytes = 24;
     final long pageSizeBytes = 256 + 8; // 8 bytes for end-of-page marker
     final long numRecordsPerPage = (pageSizeBytes - 8) / recordLengthBytes;
@@ -536,8 +536,8 @@ public void testTotalMemoryConsumption() {
     // monotonically increasing. More specifically, every time we allocate a new page it
     // should increase by exactly the size of the page. In this regard, the memory usage
     // at any given time is also the peak memory used.
-    long previousMemory = map.getTotalMemoryConsumption();
-    long newMemory;
+    long previousPeakMemory = map.getPeakMemoryUsedBytes();
+    long newPeakMemory;
     try {
       for (long i = 0; i < numRecordsPerPage * 10; i++) {
         final long[] value = new long[]{i};
@@ -548,15 +548,21 @@ public void testTotalMemoryConsumption() {
           value,
           PlatformDependent.LONG_ARRAY_OFFSET,
           8);
-        newMemory = map.getTotalMemoryConsumption();
+        newPeakMemory = map.getPeakMemoryUsedBytes();
         if (i % numRecordsPerPage == 0) {
           // We allocated a new page for this record, so peak memory should change
-          assertEquals(previousMemory + pageSizeBytes, newMemory);
+          assertEquals(previousPeakMemory + pageSizeBytes, newPeakMemory);
         } else {
-          assertEquals(previousMemory, newMemory);
+          assertEquals(previousPeakMemory, newPeakMemory);
         }
-        previousMemory = newMemory;
+        previousPeakMemory = newPeakMemory;
       }
+
+      // Freeing the map should not change the peak memory
+      map.free();
+      newPeakMemory = map.getPeakMemoryUsedBytes();
+      assertEquals(previousPeakMemory, newPeakMemory);
+
     } finally {
       map.free();
     }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java
@@ -210,11 +210,10 @@ public void close() {
   }
 
   /**
-   * The memory used by this map's managed structures, in bytes.
-   * Note that this is also the peak memory used by this map, since the map is append-only.
+   * Return the peak memory used so far, in bytes.
    */
-  public long getMemoryUsage() {
-    return map.getTotalMemoryConsumption();
+  public long getPeakMemoryUsedBytes() {
+    return map.getPeakMemoryUsedBytes();
   }
 
   /**
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java
@@ -159,6 +159,13 @@ public KVSorterIterator sortedIterator() throws IOException {
     }
   }
 
+  /**
+   * Return the peak memory used so far, in bytes.
+   */
+  public long getPeakMemoryUsedBytes() {
+    return sorter.getPeakMemoryUsedBytes();
+  }
+
   /**
    * Marks the current page as no-more-space-available, and as a result, either allocate a
    * new page or spill when we see the next record.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala
@@ -397,14 +397,20 @@ class TungstenAggregationIterator(
   private[this] var mapIteratorHasNext: Boolean = false
 
   ///////////////////////////////////////////////////////////////////////////
-  // Part 4: The function used to switch this iterator from hash-based
-  // aggregation to sort-based aggregation.
+  // Part 3: Methods and fields used by sort-based aggregation.
   ///////////////////////////////////////////////////////////////////////////
 
+  // This sorter is used for sort-based aggregation. It is initialized as soon as
+  // we switch from hash-based to sort-based aggregation. Otherwise, it is not used.
+  private[this] var externalSorter: UnsafeKVExternalSorter = null
+
+  /**
+   * Switch to sort-based aggregation when the hash-based approach is unable to acquire memory.
+   */
   private def switchToSortBasedAggregation(firstKey: UnsafeRow, firstInput: UnsafeRow): Unit = {
     logInfo("falling back to sort based aggregation.")
     // Step 1: Get the ExternalSorter containing sorted entries of the map.
-    val externalSorter: UnsafeKVExternalSorter = hashMap.destructAndCreateExternalSorter()
+    externalSorter = hashMap.destructAndCreateExternalSorter()
 
     // Step 2: Free the memory used by the map.
     hashMap.free()
@@ -601,7 +607,7 @@ class TungstenAggregationIterator(
   }
 
   ///////////////////////////////////////////////////////////////////////////
-  // Par 7: Iterator's public methods.
+  // Part 7: Iterator's public methods.
   ///////////////////////////////////////////////////////////////////////////
 
   override final def hasNext: Boolean = {
@@ -610,7 +616,7 @@ class TungstenAggregationIterator(
 
   override final def next(): UnsafeRow = {
     if (hasNext) {
-      if (sortBased) {
+      val res = if (sortBased) {
         // Process the current group.
         processCurrentSortedGroup()
         // Generate output row for the current group.
@@ -633,9 +639,6 @@ class TungstenAggregationIterator(
         if (!mapIteratorHasNext) {
           // If there is no input from aggregationBufferMapIterator, we copy current result.
           val resultCopy = result.copy()
-          // Report memory usage metrics.
-          TaskContext.get().internalMetricsToAccumulators(
-            InternalAccumulator.PEAK_EXECUTION_MEMORY).add(hashMap.getMemoryUsage)
           // Then, we free the map.
           hashMap.free()
 
@@ -644,6 +647,19 @@ class TungstenAggregationIterator(
           result
         }
       }
+
+      // If this is the last record, update the task's peak memory usage. Since we destroy
+      // the map to create the sorter, their memory usages should not overlap, so it is safe
+      // to just use the max of the two.
+      if (!hasNext) {
+        val mapMemory = hashMap.getPeakMemoryUsedBytes
+        val sorterMemory = Option(externalSorter).map(_.getPeakMemoryUsedBytes).getOrElse(0L)
+        val peakMemory = Math.max(mapMemory, sorterMemory)
+        TaskContext.get().internalMetricsToAccumulators(
+          InternalAccumulator.PEAK_EXECUTION_MEMORY).add(peakMemory)
+      }
+
+      res
     } else {
       // no more result
       throw new NoSuchElementException
@@ -654,6 +670,7 @@ class TungstenAggregationIterator(
   // Part 8: A utility function used to generate a output row when there is no
   // input and there is no grouping expression.
   ///////////////////////////////////////////////////////////////////////////
+
   def outputForEmptyGroupingKeyWithoutInput(): UnsafeRow = {
     if (groupingExpressions.isEmpty) {
       sortBasedAggregationBuffer.copyFrom(initialAggregationBuffer)

Original file line number	Diff line number	Diff line change
`@@ -210,11 +210,10 @@ public void close() {`
`210`	`210`	`}`
`211`	`211`
`212`	`212`	`/**`
`213`		`- * The memory used by this map's managed structures, in bytes.`
`214`		`- * Note that this is also the peak memory used by this map, since the map is append-only.`
	`213`	`+ * Return the peak memory used so far, in bytes.`
`215`	`214`	`*/`
`216`		`- public long getMemoryUsage() {`
`217`		`- return map.getTotalMemoryConsumption();`
	`215`	`+ public long getPeakMemoryUsedBytes() {`
	`216`	`+ return map.getPeakMemoryUsedBytes();`
`218`	`217`	`}`
`219`	`218`
`220`	`219`	`/**`
Original file line number	Diff line number	Diff line change
`@@ -159,6 +159,13 @@ public KVSorterIterator sortedIterator() throws IOException {`
`159`	`159`	`}`
`160`	`160`	`}`
`161`	`161`
	`162`	`+ /**`
	`163`	`+ * Return the peak memory used so far, in bytes.`
	`164`	`+ */`
	`165`	`+ public long getPeakMemoryUsedBytes() {`
	`166`	`+ return sorter.getPeakMemoryUsedBytes();`
	`167`	`+ }`
	`168`	`+`
`162`	`169`	`/**`
`163`	`170`	`* Marks the current page as no-more-space-available, and as a result, either allocate a`
`164`	`171`	`* new page or spill when we see the next record.`