Address a number of minor review comments

JoshRosen · JoshRosen · commit f7c620c8f6a2 · 2015-10-20T15:56:00.000-07:00
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
@@ -21,12 +21,15 @@
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import javax.annotation.Nullable;
 
+import scala.None$;
 import scala.Option;
 import scala.Product2;
 import scala.Tuple2;
 import scala.collection.Iterator;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.io.Closeables;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -45,8 +48,6 @@
 import org.apache.spark.storage.*;
 import org.apache.spark.util.Utils;
 
-import javax.annotation.Nullable;
-
 /**
  * This class implements sort-based shuffle's hash-style shuffle fallback path. This write path
  * writes incoming records to separate files, one file per reduce partition, then concatenates these
@@ -160,11 +161,16 @@ public void write(Iterator<Product2<K, V>> records) throws IOException {
     mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths);
   }
 
-  // Exposed for testing
+  @VisibleForTesting
   long[] getPartitionLengths() {
     return partitionLengths;
   }
 
+  /**
+   * Concatenate all of the per-partition files into a single combined file.
+   *
+   * @return array of lengths, in bytes, of each partition of the file (used by map output tracker).
+   */
   private long[] writePartitionedFile(File outputFile) throws IOException {
     // Track location of the partition starts in the output file
     final long[] lengths = new long[numPartitions];
@@ -202,7 +208,7 @@ private long[] writePartitionedFile(File outputFile) throws IOException {
   @Override
   public Option<MapStatus> stop(boolean success) {
     if (stopping) {
-      return Option.apply(null);
+      return None$.empty();
     } else {
       stopping = true;
       if (success) {
@@ -226,7 +232,7 @@ public Option<MapStatus> stop(boolean success) {
           }
         }
         shuffleBlockResolver.removeDataByMap(shuffleId, mapId);
-        return Option.apply(null);
+        return None$.empty();
       }
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
@@ -74,7 +74,11 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
         " Shuffle will continue to spill to disk when necessary.")
   }
 
+  /**
+   * A mapping from shuffle ids to the number of mappers producing output for those shuffles.
+   */
   private[this] val numMapsForShuffle = new ConcurrentHashMap[Int, Int]()
+
   override val shuffleBlockResolver = new IndexShuffleBlockResolver(conf)
 
   /**
@@ -168,8 +172,9 @@ private[spark] object SortShuffleManager extends Logging {
 
   /**
    * The maximum number of shuffle output partitions that SortShuffleManager supports when
-   *
-   */
+   * buffering map outputs in a serialized form. This is an extreme defensive programming measure,
+   * since it's extremely unlikely that a single shuffle produces over 16 million output partitions.
+   * */
   val MAX_SHUFFLE_OUTPUT_PARTITIONS_FOR_SERIALIZED_MODE =
     PackedRecordPointer.MAXIMUM_PARTITION_ID + 1
 
diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
@@ -21,7 +21,6 @@
 import java.nio.ByteBuffer;
 import java.util.*;
 
-import org.apache.spark.shuffle.sort.SerializedShuffleHandle;
 import scala.*;
 import scala.collection.Iterator;
 import scala.runtime.AbstractFunction1;
@@ -56,6 +55,7 @@
 import org.apache.spark.scheduler.MapStatus;
 import org.apache.spark.shuffle.IndexShuffleBlockResolver;
 import org.apache.spark.shuffle.ShuffleMemoryManager;
+import org.apache.spark.shuffle.sort.SerializedShuffleHandle;
 import org.apache.spark.storage.*;
 import org.apache.spark.unsafe.memory.ExecutorMemoryManager;
 import org.apache.spark.unsafe.memory.MemoryAllocator;
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala
@@ -122,7 +122,7 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte
       conf
     )
     writer.write(Iterator.empty)
-    writer.stop(true)
+    writer.stop( /* success = */ true)
     assert(writer.getPartitionLengths.sum === 0)
     assert(outputFile.exists())
     assert(outputFile.length() === 0)
@@ -146,7 +146,7 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte
       conf
     )
     writer.write(records)
-    writer.stop(true)
+    writer.stop( /* success = */ true)
     assert(temporaryFilesCreated.nonEmpty)
     assert(writer.getPartitionLengths.sum === outputFile.length())
     assert(temporaryFilesCreated.count(_.exists()) === 0) // check that temporary files were deleted
@@ -175,7 +175,7 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte
       }))
     }
     assert(temporaryFilesCreated.nonEmpty)
-    writer.stop(false)
+    writer.stop( /* success = */ false)
     assert(temporaryFilesCreated.count(_.exists()) === 0)
   }
 

Original file line number	Diff line number	Diff line change
`@@ -122,7 +122,7 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte`
`122`	`122`	`conf`
`123`	`123`	`)`
`124`	`124`	`writer.write(Iterator.empty)`
`125`		`- writer.stop(true)`
	`125`	`+ writer.stop( /* success = */ true)`
`126`	`126`	`assert(writer.getPartitionLengths.sum === 0)`
`127`	`127`	`assert(outputFile.exists())`
`128`	`128`	`assert(outputFile.length() === 0)`
`@@ -146,7 +146,7 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte`
`146`	`146`	`conf`
`147`	`147`	`)`
`148`	`148`	`writer.write(records)`
`149`		`- writer.stop(true)`
	`149`	`+ writer.stop( /* success = */ true)`
`150`	`150`	`assert(temporaryFilesCreated.nonEmpty)`
`151`	`151`	`assert(writer.getPartitionLengths.sum === outputFile.length())`
`152`	`152`	`assert(temporaryFilesCreated.count(_.exists()) === 0) // check that temporary files were deleted`
`@@ -175,7 +175,7 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte`
`175`	`175`	`}))`
`176`	`176`	`}`
`177`	`177`	`assert(temporaryFilesCreated.nonEmpty)`
`178`		`- writer.stop(false)`
	`178`	`+ writer.stop( /* success = */ false)`
`179`	`179`	`assert(temporaryFilesCreated.count(_.exists()) === 0)`
`180`	`180`	`}`
`181`	`181`