-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-10708] Consolidate sort shuffle implementations #8829
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
fbefa00
80d6701
26ecf5c
67de10a
af2794c
7a13b99
e6fdd46
c60e5b5
68c3a25
5419ca4
426e016
8276eb0
3ffa137
71815b9
0e8b05e
8fe9094
abe2971
18bd9b7
ddf858c
37aedce
e279275
104fb98
b3d5390
e1d7d59
8b52156
d3b091e
b3af7a8
dbbd003
bf0170e
26f5f6d
71d67fe
f7c620c
322fd13
db0cd28
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,21 +21,30 @@ | |
| import java.io.FileInputStream; | ||
| import java.io.FileOutputStream; | ||
| import java.io.IOException; | ||
| import javax.annotation.Nullable; | ||
|
|
||
| import scala.None$; | ||
| import scala.Option; | ||
| import scala.Product2; | ||
| import scala.Tuple2; | ||
| import scala.collection.Iterator; | ||
|
|
||
| import com.google.common.annotations.VisibleForTesting; | ||
| import com.google.common.io.Closeables; | ||
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
|
|
||
| import org.apache.spark.Partitioner; | ||
| import org.apache.spark.ShuffleDependency; | ||
| import org.apache.spark.SparkConf; | ||
| import org.apache.spark.TaskContext; | ||
| import org.apache.spark.executor.ShuffleWriteMetrics; | ||
| import org.apache.spark.scheduler.MapStatus; | ||
| import org.apache.spark.scheduler.MapStatus$; | ||
| import org.apache.spark.serializer.Serializer; | ||
| import org.apache.spark.serializer.SerializerInstance; | ||
| import org.apache.spark.shuffle.IndexShuffleBlockResolver; | ||
| import org.apache.spark.shuffle.ShuffleWriter; | ||
| import org.apache.spark.storage.*; | ||
| import org.apache.spark.util.Utils; | ||
|
|
||
|
|
@@ -62,7 +71,7 @@ | |
| * <p> | ||
| * There have been proposals to completely remove this code path; see SPARK-6026 for details. | ||
| */ | ||
| final class BypassMergeSortShuffleWriter<K, V> implements SortShuffleFileWriter<K, V> { | ||
| final class BypassMergeSortShuffleWriter<K, V> extends ShuffleWriter<K, V> { | ||
|
|
||
| private final Logger logger = LoggerFactory.getLogger(BypassMergeSortShuffleWriter.class); | ||
|
|
||
|
|
@@ -72,31 +81,52 @@ final class BypassMergeSortShuffleWriter<K, V> implements SortShuffleFileWriter< | |
| private final BlockManager blockManager; | ||
| private final Partitioner partitioner; | ||
| private final ShuffleWriteMetrics writeMetrics; | ||
| private final int shuffleId; | ||
| private final int mapId; | ||
| private final Serializer serializer; | ||
| private final IndexShuffleBlockResolver shuffleBlockResolver; | ||
|
|
||
| /** Array of file writers, one for each partition */ | ||
| private DiskBlockObjectWriter[] partitionWriters; | ||
| @Nullable private MapStatus mapStatus; | ||
| private long[] partitionLengths; | ||
|
|
||
| /** | ||
| * Are we in the process of stopping? Because map tasks can call stop() with success = true | ||
| * and then call stop() with success = false if they get an exception, we want to make sure | ||
| * we don't try deleting files, etc twice. | ||
| */ | ||
| private boolean stopping = false; | ||
|
|
||
| public BypassMergeSortShuffleWriter( | ||
| SparkConf conf, | ||
| BlockManager blockManager, | ||
| Partitioner partitioner, | ||
| ShuffleWriteMetrics writeMetrics, | ||
| Serializer serializer) { | ||
| IndexShuffleBlockResolver shuffleBlockResolver, | ||
| BypassMergeSortShuffleHandle<K, V> handle, | ||
| int mapId, | ||
| TaskContext taskContext, | ||
| SparkConf conf) { | ||
| // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided | ||
| this.fileBufferSize = (int) conf.getSizeAsKb("spark.shuffle.file.buffer", "32k") * 1024; | ||
| this.transferToEnabled = conf.getBoolean("spark.file.transferTo", true); | ||
| this.numPartitions = partitioner.numPartitions(); | ||
| this.blockManager = blockManager; | ||
| this.partitioner = partitioner; | ||
| this.writeMetrics = writeMetrics; | ||
| this.serializer = serializer; | ||
| final ShuffleDependency<K, V, V> dep = handle.dependency(); | ||
| this.mapId = mapId; | ||
| this.shuffleId = dep.shuffleId(); | ||
| this.partitioner = dep.partitioner(); | ||
| this.numPartitions = partitioner.numPartitions(); | ||
| this.writeMetrics = new ShuffleWriteMetrics(); | ||
| taskContext.taskMetrics().shuffleWriteMetrics_$eq(Option.apply(writeMetrics)); | ||
| this.serializer = Serializer.getSerializer(dep.serializer()); | ||
| this.shuffleBlockResolver = shuffleBlockResolver; | ||
| } | ||
|
|
||
| @Override | ||
| public void insertAll(Iterator<Product2<K, V>> records) throws IOException { | ||
| public void write(Iterator<Product2<K, V>> records) throws IOException { | ||
| assert (partitionWriters == null); | ||
| if (!records.hasNext()) { | ||
| partitionLengths = new long[numPartitions]; | ||
| shuffleBlockResolver.writeIndexFile(shuffleId, mapId, partitionLengths); | ||
| mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths); | ||
| return; | ||
| } | ||
| final SerializerInstance serInstance = serializer.newInstance(); | ||
|
|
@@ -124,13 +154,24 @@ public void insertAll(Iterator<Product2<K, V>> records) throws IOException { | |
| for (DiskBlockObjectWriter writer : partitionWriters) { | ||
| writer.commitAndClose(); | ||
| } | ||
|
|
||
| partitionLengths = | ||
| writePartitionedFile(shuffleBlockResolver.getDataFile(shuffleId, mapId)); | ||
| shuffleBlockResolver.writeIndexFile(shuffleId, mapId, partitionLengths); | ||
| mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths); | ||
| } | ||
|
|
||
| @Override | ||
| public long[] writePartitionedFile( | ||
| BlockId blockId, | ||
| TaskContext context, | ||
| File outputFile) throws IOException { | ||
| @VisibleForTesting | ||
| long[] getPartitionLengths() { | ||
| return partitionLengths; | ||
| } | ||
|
|
||
| /** | ||
| * Concatenate all of the per-partition files into a single combined file. | ||
| * | ||
| * @return array of lengths, in bytes, of each partition of the file (used by map output tracker). | ||
| */ | ||
| private long[] writePartitionedFile(File outputFile) throws IOException { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can u add inline doc explaining what long[] is (in particular explain whether its offset, or length)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Copied and adapted the comment from the regular SortShuffleWriter: /**
* Concatenate all of the per-partition files into a single combined file.
*
* @return array of lengths, in bytes, of each partition of the file (used by map output tracker)
*/ |
||
| // Track location of the partition starts in the output file | ||
| final long[] lengths = new long[numPartitions]; | ||
| if (partitionWriters == null) { | ||
|
|
@@ -165,18 +206,33 @@ public long[] writePartitionedFile( | |
| } | ||
|
|
||
| @Override | ||
| public void stop() throws IOException { | ||
| if (partitionWriters != null) { | ||
| try { | ||
| for (DiskBlockObjectWriter writer : partitionWriters) { | ||
| // This method explicitly does _not_ throw exceptions: | ||
| File file = writer.revertPartialWritesAndClose(); | ||
| if (!file.delete()) { | ||
| logger.error("Error while deleting file {}", file.getAbsolutePath()); | ||
| public Option<MapStatus> stop(boolean success) { | ||
| if (stopping) { | ||
| return None$.empty(); | ||
| } else { | ||
| stopping = true; | ||
| if (success) { | ||
| if (mapStatus == null) { | ||
| throw new IllegalStateException("Cannot call stop(true) without having called write()"); | ||
| } | ||
| return Option.apply(mapStatus); | ||
| } else { | ||
| // The map task failed, so delete our output data. | ||
| if (partitionWriters != null) { | ||
| try { | ||
| for (DiskBlockObjectWriter writer : partitionWriters) { | ||
| // This method explicitly does _not_ throw exceptions: | ||
| File file = writer.revertPartialWritesAndClose(); | ||
| if (!file.delete()) { | ||
| logger.error("Error while deleting file {}", file.getAbsolutePath()); | ||
| } | ||
| } | ||
| } finally { | ||
| partitionWriters = null; | ||
| } | ||
| } | ||
| } finally { | ||
| partitionWriters = null; | ||
| shuffleBlockResolver.removeDataByMap(shuffleId, mapId); | ||
| return None$.empty(); | ||
| } | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should this be stopping, or jsut make stop itself idempotent? (i.e. isStopped)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good question. This was more-or-less copied from somewhere else and it was originally written by someone else, so I'll have to do some sleuthing to figure out why we need this.