-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-9747] [SQL] Avoid starving an unsafe operator in aggregation #8038
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
Show all changes
14 commits
Select commit
Hold shift + click to select a range
27f2e7f
Reserve memory in advance in TungstenAggregate
6549654
Actually request the memory in constructor + add tests
995be3d
Merge branch 'master' of github.com:apache/spark into unsafe-starve-m…
ca1b44c
Minor: Update comment
355a9bd
Merge branch 'master' of github.com:apache/spark into unsafe-starve-m…
2cf088a
Merge branch 'master' of github.com:apache/spark into unsafe-starve-m…
4d416d0
Do not acquire a page if creating sorter destructively
70b8500
Merge branch 'master' of github.com:apache/spark into unsafe-starve-m…
b4d3633
Address comments
b10a4f3
Fix test
94ca5de
Merge branch 'master' of github.com:apache/spark into unsafe-starve-m…
d4dc9ca
Fix tests
19f2e1b
Fix tests again
7ebf6b9
Merge branch 'master' of github.com:apache/spark into unsafe-starve-m…
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -72,8 +72,6 @@ import org.apache.spark.sql.types.StructType | |
| * the function used to create mutable projections. | ||
| * @param originalInputAttributes | ||
| * attributes of representing input rows from `inputIter`. | ||
| * @param inputIter | ||
| * the iterator containing input [[UnsafeRow]]s. | ||
| */ | ||
| class TungstenAggregationIterator( | ||
| groupingExpressions: Seq[NamedExpression], | ||
|
|
@@ -83,12 +81,14 @@ class TungstenAggregationIterator( | |
| resultExpressions: Seq[NamedExpression], | ||
| newMutableProjection: (Seq[Expression], Seq[Attribute]) => (() => MutableProjection), | ||
| originalInputAttributes: Seq[Attribute], | ||
| inputIter: Iterator[InternalRow], | ||
| testFallbackStartsAt: Option[Int], | ||
| numInputRows: LongSQLMetric, | ||
| numOutputRows: LongSQLMetric) | ||
| extends Iterator[UnsafeRow] with Logging { | ||
|
|
||
| // The parent partition iterator, to be initialized later in `start` | ||
| private[this] var inputIter: Iterator[InternalRow] = null | ||
|
|
||
| /////////////////////////////////////////////////////////////////////////// | ||
| // Part 1: Initializing aggregate functions. | ||
| /////////////////////////////////////////////////////////////////////////// | ||
|
|
@@ -348,11 +348,15 @@ class TungstenAggregationIterator( | |
| false // disable tracking of performance metrics | ||
| ) | ||
|
|
||
| // Exposed for testing | ||
| private[aggregate] def getHashMap: UnsafeFixedWidthAggregationMap = hashMap | ||
|
|
||
| // The function used to read and process input rows. When processing input rows, | ||
| // it first uses hash-based aggregation by putting groups and their buffers in | ||
| // hashMap. If we could not allocate more memory for the map, we switch to | ||
| // sort-based aggregation (by calling switchToSortBasedAggregation). | ||
| private def processInputs(): Unit = { | ||
| assert(inputIter != null, "attempted to process input when iterator was null") | ||
| while (!sortBased && inputIter.hasNext) { | ||
| val newInput = inputIter.next() | ||
| numInputRows += 1 | ||
|
|
@@ -372,6 +376,7 @@ class TungstenAggregationIterator( | |
| // that it switch to sort-based aggregation after `fallbackStartsAt` input rows have | ||
| // been processed. | ||
| private def processInputsWithControlledFallback(fallbackStartsAt: Int): Unit = { | ||
| assert(inputIter != null, "attempted to process input when iterator was null") | ||
| var i = 0 | ||
| while (!sortBased && inputIter.hasNext) { | ||
| val newInput = inputIter.next() | ||
|
|
@@ -412,6 +417,7 @@ class TungstenAggregationIterator( | |
| * Switch to sort-based aggregation when the hash-based approach is unable to acquire memory. | ||
| */ | ||
| private def switchToSortBasedAggregation(firstKey: UnsafeRow, firstInput: InternalRow): Unit = { | ||
| assert(inputIter != null, "attempted to process input when iterator was null") | ||
| logInfo("falling back to sort based aggregation.") | ||
| // Step 1: Get the ExternalSorter containing sorted entries of the map. | ||
| externalSorter = hashMap.destructAndCreateExternalSorter() | ||
|
|
@@ -431,6 +437,11 @@ class TungstenAggregationIterator( | |
| case _ => false | ||
| } | ||
|
|
||
| // Note: Since we spill the sorter's contents immediately after creating it, we must insert | ||
| // something into the sorter here to ensure that we acquire at least a page of memory. | ||
| // This is done through `externalSorter.insertKV`, which will trigger the page allocation. | ||
| // Otherwise, children operators may steal the window of opportunity and starve our sorter. | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about we explicitly say that
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
| if (needsProcess) { | ||
| // First, we create a buffer. | ||
| val buffer = createNewAggregationBuffer() | ||
|
|
@@ -588,27 +599,33 @@ class TungstenAggregationIterator( | |
| // have not switched to sort-based aggregation. | ||
| /////////////////////////////////////////////////////////////////////////// | ||
|
|
||
| // Starts to process input rows. | ||
| testFallbackStartsAt match { | ||
| case None => | ||
| processInputs() | ||
| case Some(fallbackStartsAt) => | ||
| // This is the testing path. processInputsWithControlledFallback is same as processInputs | ||
| // except that it switches to sort-based aggregation after `fallbackStartsAt` input rows | ||
| // have been processed. | ||
| processInputsWithControlledFallback(fallbackStartsAt) | ||
| } | ||
| /** | ||
| * Start processing input rows. | ||
| * Only after this method is called will this iterator be non-empty. | ||
| */ | ||
| def start(parentIter: Iterator[InternalRow]): Unit = { | ||
| inputIter = parentIter | ||
| testFallbackStartsAt match { | ||
| case None => | ||
| processInputs() | ||
| case Some(fallbackStartsAt) => | ||
| // This is the testing path. processInputsWithControlledFallback is same as processInputs | ||
| // except that it switches to sort-based aggregation after `fallbackStartsAt` input rows | ||
| // have been processed. | ||
| processInputsWithControlledFallback(fallbackStartsAt) | ||
| } | ||
|
|
||
| // If we did not switch to sort-based aggregation in processInputs, | ||
| // we pre-load the first key-value pair from the map (to make hasNext idempotent). | ||
| if (!sortBased) { | ||
| // First, set aggregationBufferMapIterator. | ||
| aggregationBufferMapIterator = hashMap.iterator() | ||
| // Pre-load the first key-value pair from the aggregationBufferMapIterator. | ||
| mapIteratorHasNext = aggregationBufferMapIterator.next() | ||
| // If the map is empty, we just free it. | ||
| if (!mapIteratorHasNext) { | ||
| hashMap.free() | ||
| // If we did not switch to sort-based aggregation in processInputs, | ||
| // we pre-load the first key-value pair from the map (to make hasNext idempotent). | ||
| if (!sortBased) { | ||
| // First, set aggregationBufferMapIterator. | ||
| aggregationBufferMapIterator = hashMap.iterator() | ||
| // Pre-load the first key-value pair from the aggregationBufferMapIterator. | ||
| mapIteratorHasNext = aggregationBufferMapIterator.next() | ||
| // If the map is empty, we just free it. | ||
| if (!mapIteratorHasNext) { | ||
| hashMap.free() | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -673,21 +690,20 @@ class TungstenAggregationIterator( | |
| } | ||
|
|
||
| /////////////////////////////////////////////////////////////////////////// | ||
| // Part 8: A utility function used to generate a output row when there is no | ||
| // input and there is no grouping expression. | ||
| // Part 8: Utility functions | ||
| /////////////////////////////////////////////////////////////////////////// | ||
|
|
||
| /** | ||
| * Generate a output row when there is no input and there is no grouping expression. | ||
| */ | ||
| def outputForEmptyGroupingKeyWithoutInput(): UnsafeRow = { | ||
| if (groupingExpressions.isEmpty) { | ||
| sortBasedAggregationBuffer.copyFrom(initialAggregationBuffer) | ||
| // We create a output row and copy it. So, we can free the map. | ||
| val resultCopy = | ||
| generateOutput(UnsafeRow.createFromByteArray(0, 0), sortBasedAggregationBuffer).copy() | ||
| hashMap.free() | ||
| resultCopy | ||
| } else { | ||
| throw new IllegalStateException( | ||
| "This method should not be called when groupingExpressions is not empty.") | ||
| } | ||
| assert(groupingExpressions.isEmpty) | ||
| assert(inputIter == null) | ||
| generateOutput(UnsafeRow.createFromByteArray(0, 0), initialAggregationBuffer) | ||
| } | ||
|
|
||
| /** Free memory used in the underlying map. */ | ||
| def free(): Unit = { | ||
| hashMap.free() | ||
| } | ||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we just return
resultRdd? Seems we do not need to cast?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually result RDD is of type
RDD[UnsafeRow]. Since RDDs are not covariant I think we do need the cast.