-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-32917][SHUFFLE][CORE] Adds support for executors to push shuffle blocks after successful map task completion #30312
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b968e0e
510f504
2d9d27d
3d74277
02cff3b
770c25c
d429fb3
3d10b20
c19bcf2
f411944
bd3649f
bf4b277
a8a350c
0b951a7
7d16198
415c2d0
28d8098
5b725a2
a2f6635
4a2aef7
1e9fb08
462af7a
a88ffd6
08386f3
d5370db
fd0e98b
8c9a482
f8632b3
6d0fade
7d70f82
23cc502
bcebb13
762ac1e
6aae02a
21ea881
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2023,4 +2023,33 @@ package object config { | |
| .version("3.1.0") | ||
| .doubleConf | ||
| .createWithDefault(5) | ||
|
|
||
| private[spark] val SHUFFLE_NUM_PUSH_THREADS = | ||
| ConfigBuilder("spark.shuffle.push.numPushThreads") | ||
| .doc("Specify the number of threads in the block pusher pool. These threads assist " + | ||
| "in creating connections and pushing blocks to remote shuffle services. By default, the " + | ||
| "threadpool size is equal to the number of spark executor cores.") | ||
| .version("3.2.0") | ||
| .intConf | ||
| .createOptional | ||
|
|
||
| private[spark] val SHUFFLE_MAX_BLOCK_SIZE_TO_PUSH = | ||
| ConfigBuilder("spark.shuffle.push.maxBlockSizeToPush") | ||
| .doc("The max size of an individual block to push to the remote shuffle services. Blocks " + | ||
| "larger than this threshold are not pushed to be merged remotely. These shuffle blocks " + | ||
| "will be fetched by the executors in the original manner.") | ||
| .version("3.2.0") | ||
| .bytesConf(ByteUnit.BYTE) | ||
| .createWithDefaultString("1m") | ||
|
|
||
| private[spark] val SHUFFLE_MAX_BLOCK_BATCH_SIZE_FOR_PUSH = | ||
| ConfigBuilder("spark.shuffle.push.maxBlockBatchSize") | ||
| .doc("The max size of a batch of shuffle blocks to be grouped into a single push request.") | ||
| .version("3.2.0") | ||
| .bytesConf(ByteUnit.BYTE) | ||
| // Default is 3m because it is greater than 2m which is the default value for | ||
| // TransportConf#memoryMapBytes. If this defaults to 2m as well it is very likely that each | ||
| // batch of block will be loaded in memory with memory mapping, which has higher overhead | ||
| // with small MB sized chunk of data. | ||
| .createWithDefaultString("3m") | ||
|
||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.