@@ -27,7 +27,7 @@ object GroupedIterator {
2727 keyExpressions : Seq [Expression ],
2828 inputSchema : Seq [Attribute ]): Iterator [(InternalRow , Iterator [InternalRow ])] = {
2929 if (input.hasNext) {
30- new GroupedIterator (input, keyExpressions, inputSchema)
30+ new GroupedIterator (input.buffered , keyExpressions, inputSchema)
3131 } else {
3232 Iterator .empty
3333 }
@@ -64,7 +64,7 @@ object GroupedIterator {
6464 * @param inputSchema The schema of the rows in the `input` iterator.
6565 */
6666class GroupedIterator private (
67- input : Iterator [InternalRow ],
67+ input : BufferedIterator [InternalRow ],
6868 groupingExpressions : Seq [Expression ],
6969 inputSchema : Seq [Attribute ])
7070 extends Iterator [(InternalRow , Iterator [InternalRow ])] {
@@ -83,11 +83,12 @@ class GroupedIterator private(
8383
8484 /** Holds a copy of an input row that is in the current group. */
8585 var currentGroup = currentRow.copy()
86- var currentIterator : Iterator [ InternalRow ] = null
86+
8787 assert(keyOrdering.compare(currentGroup, currentRow) == 0 )
88+ var currentIterator = createGroupValuesIterator()
8889
8990 // Return true if we already have the next iterator or fetching a new iterator is successful.
90- def hasNext : Boolean = currentIterator != null || fetchNextGroupIterator
91+ def hasNext : Boolean = currentIterator.ne( null ) || fetchNextGroupIterator
9192
9293 def next (): (InternalRow , Iterator [InternalRow ]) = {
9394 assert(hasNext) // Ensure we have fetched the next iterator.
@@ -96,46 +97,64 @@ class GroupedIterator private(
9697 ret
9798 }
9899
99- def fetchNextGroupIterator (): Boolean = {
100- if (currentRow != null || input.hasNext) {
101- val inputIterator = new Iterator [InternalRow ] {
102- // Return true if we have a row and it is in the current group, or if fetching a new row is
103- // successful.
104- def hasNext = {
105- (currentRow != null && keyOrdering.compare(currentGroup, currentRow) == 0 ) ||
106- fetchNextRowInGroup()
107- }
100+ private def fetchNextGroupIterator (): Boolean = {
101+ assert(currentIterator eq null )
102+
103+ if (currentRow.eq(null ) && input.hasNext) {
104+ currentRow = input.next()
105+ }
106+
107+ if (currentRow eq null ) {
108+ // These is no data left, return false.
109+ false
110+ } else {
111+ // Skip to next group.
112+ while (input.hasNext && keyOrdering.compare(currentGroup, currentRow) == 0 ) {
113+ currentRow = input.next()
114+ }
115+
116+ if (keyOrdering.compare(currentGroup, currentRow) == 0 ) {
117+ // These is no more group. return false.
118+ false
119+ } else {
120+ // Now the `currentRow` is the first row of next group.
121+ currentGroup = currentRow.copy()
122+ currentIterator = createGroupValuesIterator()
123+ true
124+ }
125+ }
126+ }
127+
128+ private def createGroupValuesIterator (): Iterator [InternalRow ] = {
129+ new Iterator [InternalRow ] {
130+ def hasNext : Boolean = currentRow != null || fetchNextRowInGroup()
131+
132+ def next (): InternalRow = {
133+ assert(hasNext)
134+ val res = currentRow
135+ currentRow = null
136+ res
137+ }
108138
109- def fetchNextRowInGroup (): Boolean = {
110- if (currentRow != null || input.hasNext) {
139+ private def fetchNextRowInGroup (): Boolean = {
140+ assert(currentRow eq null )
141+
142+ if (input.hasNext) {
143+ // The inner iterator should NOT consume the input into next group, here we use `head` to
144+ // peek the next input, to see if we should continue to process it.
145+ if (keyOrdering.compare(currentGroup, input.head) == 0 ) {
146+ // Next input is in the current group. Continue the inner iterator.
111147 currentRow = input.next()
112- if (keyOrdering.compare(currentGroup, currentRow) == 0 ) {
113- // The row is in the current group. Continue the inner iterator.
114- true
115- } else {
116- // We got a row, but its not in the right group. End this inner iterator and prepare
117- // for the next group.
118- currentIterator = null
119- currentGroup = currentRow.copy()
120- false
121- }
148+ true
122149 } else {
123- // There is no more input so we are done .
150+ // Next input is not in the right group. End this inner iterator .
124151 false
125152 }
126- }
127-
128- def next (): InternalRow = {
129- assert(hasNext) // Ensure we have fetched the next row.
130- val res = currentRow
131- currentRow = null
132- res
153+ } else {
154+ // There is no more data, return false.
155+ false
133156 }
134157 }
135- currentIterator = inputIterator
136- true
137- } else {
138- false
139158 }
140159 }
141160}
0 commit comments