Skip to content

Conversation

@maropu
Copy link
Member

@maropu maropu commented Aug 30, 2017

What changes were proposed in this pull request?

This pr proposes to split aggregation code into pieces in HashAggregateExec. In #18810, we got performance regression if JVMs didn't compile too long functions (the limit is 8000 in bytecode size). I checked and I found the codegen of HashAggregateExec frequently goes over the limit, for example:

scala> spark.range(1).selectExpr("id % 1024 AS a", "id AS b").write.saveAsTable("t")
scala> sql("SELECT a, KURTOSIS(b)FROM t GROUP BY a")

This query goes over the limit and the actual bytecode size is 12356.
This pr split the aggregation code into small separate functions and, in a simple example;

scala> sql("SELECT SUM(a), AVG(a) FROM VALUES(1) t(a)").debugCodegen
  • generated code with this pr:
/* 083 */   private void agg_doAggregateWithoutKey() throws java.io.IOException {
/* 084 */     // initialize aggregation buffer
/* 085 */     final long agg_value = -1L;
/* 086 */     agg_bufIsNull = true;
/* 087 */     agg_bufValue = agg_value;
/* 088 */     boolean agg_isNull1 = false;
/* 089 */     double agg_value1 = -1.0;
/* 090 */     if (!false) {
/* 091 */       agg_value1 = (double) 0;
/* 092 */     }
/* 093 */     agg_bufIsNull1 = agg_isNull1;
/* 094 */     agg_bufValue1 = agg_value1;
/* 095 */     agg_bufIsNull2 = false;
/* 096 */     agg_bufValue2 = 0L;
/* 097 */
/* 098 */     while (inputadapter_input.hasNext() && !stopEarly()) {
/* 099 */       InternalRow inputadapter_row = (InternalRow) inputadapter_input.next();
/* 100 */       boolean inputadapter_isNull = inputadapter_row.isNullAt(0);
/* 101 */       long inputadapter_value = inputadapter_isNull ? -1L : (inputadapter_row.getLong(0));
/* 102 */       boolean inputadapter_isNull1 = inputadapter_row.isNullAt(1);
/* 103 */       double inputadapter_value1 = inputadapter_isNull1 ? -1.0 : (inputadapter_row.getDouble(1));
/* 104 */       boolean inputadapter_isNull2 = inputadapter_row.isNullAt(2);
/* 105 */       long inputadapter_value2 = inputadapter_isNull2 ? -1L : (inputadapter_row.getLong(2));
/* 106 */
/* 107 */       // do aggregate
/* 108 */       // copy aggregation buffer to the local
/* 109 */       boolean agg_localBufIsNull = agg_bufIsNull;
/* 110 */       long agg_localBufValue = agg_bufValue;
/* 111 */       boolean agg_localBufIsNull1 = agg_bufIsNull1;
/* 112 */       double agg_localBufValue1 = agg_bufValue1;
/* 113 */       boolean agg_localBufIsNull2 = agg_bufIsNull2;
/* 114 */       long agg_localBufValue2 = agg_bufValue2;
/* 115 */       // common sub-expressions
/* 116 */
/* 117 */       // process aggregate functions to update aggregation buffer
/* 118 */       agg_doAggregateVal_coalesce(agg_localBufIsNull, agg_localBufValue, inputadapter_value, inputadapter_isNull);
/* 119 */       agg_doAggregateVal_add(agg_localBufValue1, inputadapter_isNull1, inputadapter_value1, agg_localBufIsNull1);
/* 120 */       agg_doAggregateVal_add1(inputadapter_isNull2, inputadapter_value2, agg_localBufIsNull2, agg_localBufValue2);
/* 121 */       if (shouldStop()) return;
/* 122 */     }
  • generated code in the current master
/* 083 */   private void agg_doAggregateWithoutKey() throws java.io.IOException {
/* 084 */     // initialize aggregation buffer
/* 085 */     final long agg_value = -1L;
/* 086 */     agg_bufIsNull = true;
/* 087 */     agg_bufValue = agg_value;
/* 088 */     boolean agg_isNull1 = false;
/* 089 */     double agg_value1 = -1.0;
/* 090 */     if (!false) {
/* 091 */       agg_value1 = (double) 0;
/* 092 */     }
/* 093 */     agg_bufIsNull1 = agg_isNull1;
/* 094 */     agg_bufValue1 = agg_value1;
/* 095 */     agg_bufIsNull2 = false;
/* 096 */     agg_bufValue2 = 0L;
/* 097 */
/* 098 */     while (inputadapter_input.hasNext() && !stopEarly()) {
/* 099 */       InternalRow inputadapter_row = (InternalRow) inputadapter_input.next();
/* 100 */       boolean inputadapter_isNull = inputadapter_row.isNullAt(0);
/* 101 */       long inputadapter_value = inputadapter_isNull ? -1L : (inputadapter_row.getLong(0));
/* 102 */       boolean inputadapter_isNull1 = inputadapter_row.isNullAt(1);
/* 103 */       double inputadapter_value1 = inputadapter_isNull1 ? -1.0 : (inputadapter_row.getDouble(1));
/* 104 */       boolean inputadapter_isNull2 = inputadapter_row.isNullAt(2);
/* 105 */       long inputadapter_value2 = inputadapter_isNull2 ? -1L : (inputadapter_row.getLong(2));
/* 106 */
/* 107 */       // do aggregate
/* 108 */       // common sub-expressions
/* 109 */
/* 110 */       // evaluate aggregate function
/* 111 */       boolean agg_isNull12 = true;
/* 112 */       long agg_value12 = -1L;
/* 113 */
/* 114 */       boolean agg_isNull13 = agg_bufIsNull;
/* 115 */       long agg_value13 = agg_bufValue;
/* 116 */       if (agg_isNull13) {
/* 117 */         boolean agg_isNull15 = false;
/* 118 */         long agg_value15 = -1L;
/* 119 */         if (!false) {
/* 120 */           agg_value15 = (long) 0;
/* 121 */         }
/* 122 */         if (!agg_isNull15) {
/* 123 */           agg_isNull13 = false;
/* 124 */           agg_value13 = agg_value15;
/* 125 */         }
/* 126 */       }
/* 127 */
/* 128 */       if (!inputadapter_isNull) {
/* 129 */         agg_isNull12 = false; // resultCode could change nullability.
/* 130 */         agg_value12 = agg_value13 + inputadapter_value;
/* 131 */
/* 132 */       }
/* 133 */       boolean agg_isNull11 = agg_isNull12;
/* 134 */       long agg_value11 = agg_value12;
/* 135 */       if (agg_isNull11) {
/* 136 */         if (!agg_bufIsNull) {
/* 137 */           agg_isNull11 = false;
/* 138 */           agg_value11 = agg_bufValue;
/* 139 */         }
/* 140 */       }
/* 141 */       boolean agg_isNull19 = true;
/* 142 */       double agg_value19 = -1.0;
/* 143 */
/* 144 */       if (!agg_bufIsNull1) {
/* 145 */         if (!inputadapter_isNull1) {
/* 146 */           agg_isNull19 = false; // resultCode could change nullability.
/* 147 */           agg_value19 = agg_bufValue1 + inputadapter_value1;
/* 148 */
/* 149 */         }
/* 150 */
/* 151 */       }
/* 152 */       boolean agg_isNull22 = true;
/* 153 */       long agg_value22 = -1L;
/* 154 */
/* 155 */       if (!agg_bufIsNull2) {
/* 156 */         if (!inputadapter_isNull2) {
/* 157 */           agg_isNull22 = false; // resultCode could change nullability.
/* 158 */           agg_value22 = agg_bufValue2 + inputadapter_value2;
/* 159 */
/* 160 */         }
/* 161 */
/* 162 */       }
/* 163 */       // update aggregation buffer
/* 164 */       agg_bufIsNull = agg_isNull11;
/* 165 */       agg_bufValue = agg_value11;
/* 166 */
/* 167 */       agg_bufIsNull1 = agg_isNull19;
/* 168 */       agg_bufValue1 = agg_value19;
/* 169 */
/* 170 */       agg_bufIsNull2 = agg_isNull22;
/* 171 */       agg_bufValue2 = agg_value22;
/* 172 */       if (shouldStop()) return;
/* 173 */     }
/* 174 */
/* 175 */   }
/* 176 */
/* 177 */ }

I did also performance checks;

$ ./bin/spark --master=local[1]
scala> sql("SET spark.sql.shuffle.partitions=4")
scala> spark.range(10000000).selectExpr("id % 1024 AS a", "id AS b").write.saveAsTable("t")
scala> timer { sql("SELECT a, KURTOSIS(b)FROM t GROUP BY a").collect }

master w/ this pr, Avg. Elapsed Time: 2.520551837s
master, Avg. Elapsed Time: 54.029893146199996s

How was this patch tested?

Added tests in WholeStageCodegenSuite.

@maropu
Copy link
Member Author

maropu commented Aug 30, 2017

I checked the TPCDS performance;
https://docs.google.com/spreadsheets/d/1DJP8BC8aOE4Fp8t9XAYki0vauCTowt2D1n7lmNXI7xw/edit?usp=sharing
Q66 is explicitly faster and the other queries are almost the same with the previous ones.

@SparkQA
Copy link

SparkQA commented Aug 30, 2017

Test build #81235 has finished for PR 19082 at commit 6dacad4.

  • This patch fails SparkR unit tests.
  • This patch merges cleanly.
  • This patch adds no public classes.

@maropu
Copy link
Member Author

maropu commented Aug 30, 2017

Jenkins, retest this please.

@SparkQA
Copy link

SparkQA commented Aug 30, 2017

Test build #81248 has finished for PR 19082 at commit 6dacad4.

  • This patch fails SparkR unit tests.
  • This patch merges cleanly.
  • This patch adds no public classes.

@viirya
Copy link
Member

viirya commented Aug 30, 2017

Aggregation code is not the only too long gen'd codes. Any big enough queries under current whole-stage codegen can produce too long gen'd codes.

Compared with special split for aggregation code like this, I'd like to push #18931 which is a more general fix.

@maropu
Copy link
Member Author

maropu commented Aug 30, 2017

I didn't look into the code yet though, #18931 also could solve a query below?

 sql("SELECT a, KURTOSIS(b)FROM t GROUP BY a")

@viirya
Copy link
Member

viirya commented Aug 30, 2017

What it means to solve the query? Is it too long gen'd codes?

@maropu
Copy link
Member Author

maropu commented Aug 30, 2017

yes and #18931 covers all the case this pr described? (I feel these two prs solve different too-long gen'd code issues?)

@viirya
Copy link
Member

viirya commented Aug 30, 2017

After quickly scanning this change, I found actually this and #18931 are orthogonal.

This PR goes to split the aggregation expressions into functions. So it prevents long aggregation code caused by long gen'd codes expressions like Kurtosis.

#18931 goes to break the long gen'd codes caused by chain of execution operators by splitting gen'd codes of operators into functions.

@maropu
Copy link
Member Author

maropu commented Aug 30, 2017

yea, I'm with you; it seems they are orthogonal..., but If we have a more general way to solve both, it'd be the best.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since $doAggVal is non-static method, this number should be 255.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, good catch! I'll fix

Copy link
Member

@kiszk kiszk Aug 31, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add a check code for the case that a user specify a value that is more than 255?

Copy link
Member Author

@maropu maropu Sep 1, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a test-only option, so I think we need not.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OuterReference actually has special meaning in correlated subquery. This name can be confusing.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, I'll rename this.

Copy link
Contributor

@rednaxelafx rednaxelafx left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice! I like the direction this PR is going. Just some nits in inline comments:

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know the regular expression is tempting, but there's actually a better way to do this along your idea, under the current framework.

I've got a piece of code sitting in my own workspace that checks for Java identifiers:

object CodegenContext {
  private val javaKeywords = Set(
    "abstract", "assert", "boolean", "break", "byte", "case", "catch", "char", "class",
    "const", "continue", "default", "do", "double", "else", "extends", "false", "final",
    "finally", "float", "for", "goto", "if", "implements", "import", "instanceof", "int",
    "interface", "long", "native", "new", "null", "package", "private", "protected", "public",
    "return", "short", "static", "strictfp", "super", "switch", "synchronized", "this",
    "throw", "throws", "transient", "true", "try", "void", "volatile", "while"
  )

  def isJavaIdentifier(str: String): Boolean = str match {
    case null | "" => false
    case _ => java.lang.Character.isJavaIdentifierStart(str.charAt(0)) &&
              (1 until str.length).forall(
                i => java.lang.Character.isJavaIdentifierPart(str.charAt(i))) &&
              !javaKeywords.contains(str)
  }
}

Feel free to use it here if you'd like. This is the way java.lang.Character.isJavaIdentifierStart() and java.lang.Character.isJavaIdentifierPart() is supposed to be used anyway, nothing creative.

If you want to use it in a case like the way you're using the regular expression, just wrap the util above into an unapply(). But I'd say simply making def isVariable(nameId: String) = CodegenContext.isJavaIdentifier(nameId) is clean enough.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good suggestion and I'm also looking for other better one. I'll try to fix.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm. Just a cosmetic style comment: I would have declared addIfNotLiteral with a def instead of making it a scala.Function2[String, String, Unit].

BTW, can we add a comment to val argSet for what those two fields of the Tuple2[String, String] means? And then also make this addIfNotLiteral function take the arguments in the same order as the tuple.

@maropu maropu force-pushed the SPARK-21870 branch 2 times, most recently from e432136 to 57c8280 Compare September 2, 2017 00:33
@SparkQA
Copy link

SparkQA commented Sep 2, 2017

Test build #81330 has finished for PR 19082 at commit e432136.

  • This patch fails Scala style tests.
  • This patch merges cleanly.
  • This patch adds no public classes.

@SparkQA
Copy link

SparkQA commented Sep 2, 2017

Test build #81331 has finished for PR 19082 at commit 57c8280.

  • This patch fails Scala style tests.
  • This patch merges cleanly.
  • This patch adds no public classes.

@SparkQA
Copy link

SparkQA commented Sep 2, 2017

Test build #81329 has finished for PR 19082 at commit abc5450.

  • This patch fails SparkR unit tests.
  • This patch merges cleanly.
  • This patch adds no public classes.

@SparkQA
Copy link

SparkQA commented Sep 2, 2017

Test build #81328 has finished for PR 19082 at commit 78d0a00.

  • This patch fails SparkR unit tests.
  • This patch merges cleanly.
  • This patch adds no public classes.

@SparkQA
Copy link

SparkQA commented Sep 2, 2017

Test build #81332 has finished for PR 19082 at commit e3722a0.

  • This patch fails SparkR unit tests.
  • This patch merges cleanly.
  • This patch adds no public classes.

@SparkQA
Copy link

SparkQA commented Sep 3, 2017

Test build #81351 has finished for PR 19082 at commit a2b3dd3.

  • This patch fails due to an unknown error code, -9.
  • This patch merges cleanly.
  • This patch adds no public classes.

@maropu
Copy link
Member Author

maropu commented Sep 3, 2017

Jenkins, retest this please.

@SparkQA
Copy link

SparkQA commented Sep 3, 2017

Test build #81354 has finished for PR 19082 at commit a2b3dd3.

  • This patch passes all tests.
  • This patch merges cleanly.
  • This patch adds no public classes.

@maropu
Copy link
Member Author

maropu commented Sep 3, 2017

@viirya @rednaxelafx @kiszk okay, could you check again?

Copy link
Member

@kiszk kiszk Sep 5, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If line 314 uses <=, this should be 254. In the previous commit, < was used.

@SparkQA
Copy link

SparkQA commented Sep 5, 2017

Test build #81400 has finished for PR 19082 at commit 6c502a9.

  • This patch passes all tests.
  • This patch merges cleanly.
  • This patch adds no public classes.

@SparkQA
Copy link

SparkQA commented Dec 11, 2017

Test build #84717 has finished for PR 19082 at commit 1ed010f.

  • This patch fails SparkR unit tests.
  • This patch merges cleanly.
  • This patch adds no public classes.

@kiszk
Copy link
Member

kiszk commented Dec 11, 2017

Jenkins, retest this please

@SparkQA
Copy link

SparkQA commented Dec 11, 2017

Test build #84724 has finished for PR 19082 at commit 1ed010f.

  • This patch fails SparkR unit tests.
  • This patch merges cleanly.
  • This patch adds no public classes.

@gatorsmile
Copy link
Member

Just FYI, the test failure is not related to this PR.

ctx.currentVars = bufVars ++ input

// We need to copy the aggregation buffer to local variables first because each aggregate
// function directly updates the buffer when it finishes.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just FYI: we must need local copys from this discussions, too #19865

@SparkQA
Copy link

SparkQA commented Dec 12, 2017

Test build #84734 has finished for PR 19082 at commit 0e5d366.

  • This patch fails SparkR unit tests.
  • This patch merges cleanly.
  • This patch adds no public classes.

@maropu
Copy link
Member Author

maropu commented Dec 12, 2017

retest this please

@SparkQA
Copy link

SparkQA commented Dec 12, 2017

Test build #84770 has finished for PR 19082 at commit 0e5d366.

  • This patch passes all tests.
  • This patch merges cleanly.
  • This patch adds no public classes.

@SparkQA
Copy link

SparkQA commented Dec 12, 2017

Test build #84774 has finished for PR 19082 at commit 4077608.

  • This patch passes all tests.
  • This patch merges cleanly.
  • This patch adds no public classes.

@SparkQA
Copy link

SparkQA commented Dec 13, 2017

Test build #84807 has finished for PR 19082 at commit 2555e5a.

  • This patch passes all tests.
  • This patch merges cleanly.
  • This patch adds no public classes.

@maropu
Copy link
Member Author

maropu commented Dec 13, 2017

Since #19813 fixes this issue, so I'll close this. Thanks!

@maropu maropu closed this Dec 13, 2017
@maropu
Copy link
Member Author

maropu commented Dec 13, 2017

@gatorsmile can you close the jira ticket with @viirya assigned? Thanks!

val updateRowInRegularHashMap: String = {
ctx.INPUT_ROW = unsafeRowBuffer
// We need to copy the aggregation row buffer to a local row first because each aggregate
// function directly updates the buffer when it finishes.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why does this matter? We should avoid unnecessary data copy as possible as we can.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need this copy because: #19082 (comment)

@cloud-fan
Copy link
Contributor

Sorry for missing the excellent discussions here. After more thoughts, I feel this patch still makes sense. Currently we have 3 cases that may cause large method with whole stage codegen:

All of them are orthogonal but non-trivial, I don't see any of them can cover all other cases.

Copy link
Contributor

@rednaxelafx rednaxelafx left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rebasing this PR to the latest master would be interestingly involved...

stack.pop() match {
case e if subExprs.contains(e) =>
val exprCode = subExprs(e)
if (CodegenContext.isJavaIdentifier(exprCode.value)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Once we have @viirya 's #20043 merged we won't need the ugly CodegenContext.isJavaIdentifier hack any more >_<|||

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hey, good news! Thanks for letting me know ;)

@kevinjmh
Copy link
Member

kevinjmh commented Mar 5, 2019

Since #19813 fixes this issue, so I'll close this. Thanks!

I track to here from jira ticket SPARK-23791, which contains a test case.
#19813 is merged and then reverted. From the discussion under #19813, #21193 is raised.
But when I run the testcase on spark 2.4, I don't think the issue is fix.

Test result of 3 rounds : (coden on, codegen off)
(80.807065794,13.359716698),
(73.170440475,10.728186286),
(72.61997856,10.436293345))

@maropu
Copy link
Member Author

maropu commented Mar 5, 2019

Yea, this issue is not fixed now. As you know, it's a long story for this issue...
I closed this pr because this is just a band-aid fix. I'm not 100% sure the current status, but IIUC we still don't have a global consensu about how to fix this in the community.
Probably, @gatorsmile and @cloud-fan might know more.

@kevinjmh
Copy link
Member

kevinjmh commented Mar 5, 2019

Thanks for your reply.
@gatorsmile @cloud-fan, is there any update about the root cause and solution for this problem? (Performance degrade when run aggregation query on multi-columns with codeGen enable)

cloud-fan pushed a commit that referenced this pull request Sep 6, 2019
## What changes were proposed in this pull request?
This pr proposed to split aggregation code into small functions in `HashAggregateExec`. In #18810, we got performance regression if JVMs didn't compile too long functions. I checked and I found the codegen of `HashAggregateExec` frequently goes over the limit when a query has too many aggregate functions (e.g., q66 in TPCDS).

The current master places all the generated aggregation code in a single function. In this pr, I modified the code to assign an individual function for each aggregate function (e.g., `SUM`
 and `AVG`). For example, in a query
`SELECT SUM(a), AVG(a) FROM VALUES(1) t(a)`, the proposed code defines two functions
for `SUM(a)` and `AVG(a)` as follows;

- generated  code with this pr (https://gist.github.com/maropu/812990012bc967a78364be0fa793f559):
```
/* 173 */   private void agg_doConsume_0(InternalRow inputadapter_row_0, long agg_expr_0_0, boolean agg_exprIsNull_0_0, double agg_expr_1_0, boolean agg_exprIsNull_1_0, long agg_expr_2_0, boolean agg_exprIsNull_2_0) throws java.io.IOException {
/* 174 */     // do aggregate
/* 175 */     // common sub-expressions
/* 176 */
/* 177 */     // evaluate aggregate functions and update aggregation buffers
/* 178 */     agg_doAggregate_sum_0(agg_exprIsNull_0_0, agg_expr_0_0);
/* 179 */     agg_doAggregate_avg_0(agg_expr_1_0, agg_exprIsNull_1_0, agg_exprIsNull_2_0, agg_expr_2_0);
/* 180 */
/* 181 */   }
...
/* 071 */   private void agg_doAggregate_avg_0(double agg_expr_1_0, boolean agg_exprIsNull_1_0, boolean agg_exprIsNull_2_0, long agg_expr_2_0) throws java.io.IOException {
/* 072 */     // do aggregate for avg
/* 073 */     // evaluate aggregate function
/* 074 */     boolean agg_isNull_19 = true;
/* 075 */     double agg_value_19 = -1.0;
...
/* 114 */   private void agg_doAggregate_sum_0(boolean agg_exprIsNull_0_0, long agg_expr_0_0) throws java.io.IOException {
/* 115 */     // do aggregate for sum
/* 116 */     // evaluate aggregate function
/* 117 */     agg_agg_isNull_11_0 = true;
/* 118 */     long agg_value_11 = -1L;
```

- generated code in the current master (https://gist.github.com/maropu/e9d772af2c98d8991a6a5f0af7841760)
```
/* 059 */   private void agg_doConsume_0(InternalRow localtablescan_row_0, int agg_expr_0_0) throws java.io.IOException {
/* 060 */     // do aggregate
/* 061 */     // common sub-expressions
/* 062 */     boolean agg_isNull_4 = false;
/* 063 */     long agg_value_4 = -1L;
/* 064 */     if (!false) {
/* 065 */       agg_value_4 = (long) agg_expr_0_0;
/* 066 */     }
/* 067 */     // evaluate aggregate function
/* 068 */     agg_agg_isNull_7_0 = true;
/* 069 */     long agg_value_7 = -1L;
/* 070 */     do {
/* 071 */       if (!agg_bufIsNull_0) {
/* 072 */         agg_agg_isNull_7_0 = false;
/* 073 */         agg_value_7 = agg_bufValue_0;
/* 074 */         continue;
/* 075 */       }
/* 076 */
/* 077 */       boolean agg_isNull_9 = false;
/* 078 */       long agg_value_9 = -1L;
/* 079 */       if (!false) {
/* 080 */         agg_value_9 = (long) 0;
/* 081 */       }
/* 082 */       if (!agg_isNull_9) {
/* 083 */         agg_agg_isNull_7_0 = false;
/* 084 */         agg_value_7 = agg_value_9;
/* 085 */         continue;
/* 086 */       }
/* 087 */
/* 088 */     } while (false);
/* 089 */
/* 090 */     long agg_value_6 = -1L;
/* 091 */
/* 092 */     agg_value_6 = agg_value_7 + agg_value_4;
/* 093 */     boolean agg_isNull_11 = true;
/* 094 */     double agg_value_11 = -1.0;
/* 095 */
/* 096 */     if (!agg_bufIsNull_1) {
/* 097 */       agg_agg_isNull_13_0 = true;
/* 098 */       double agg_value_13 = -1.0;
/* 099 */       do {
/* 100 */         boolean agg_isNull_14 = agg_isNull_4;
/* 101 */         double agg_value_14 = -1.0;
/* 102 */         if (!agg_isNull_4) {
/* 103 */           agg_value_14 = (double) agg_value_4;
/* 104 */         }
/* 105 */         if (!agg_isNull_14) {
/* 106 */           agg_agg_isNull_13_0 = false;
/* 107 */           agg_value_13 = agg_value_14;
/* 108 */           continue;
/* 109 */         }
/* 110 */
/* 111 */         boolean agg_isNull_15 = false;
/* 112 */         double agg_value_15 = -1.0;
/* 113 */         if (!false) {
/* 114 */           agg_value_15 = (double) 0;
/* 115 */         }
/* 116 */         if (!agg_isNull_15) {
/* 117 */           agg_agg_isNull_13_0 = false;
/* 118 */           agg_value_13 = agg_value_15;
/* 119 */           continue;
/* 120 */         }
/* 121 */
/* 122 */       } while (false);
/* 123 */
/* 124 */       agg_isNull_11 = false; // resultCode could change nullability.
/* 125 */
/* 126 */       agg_value_11 = agg_bufValue_1 + agg_value_13;
/* 127 */
/* 128 */     }
/* 129 */     boolean agg_isNull_17 = false;
/* 130 */     long agg_value_17 = -1L;
/* 131 */     if (!false && agg_isNull_4) {
/* 132 */       agg_isNull_17 = agg_bufIsNull_2;
/* 133 */       agg_value_17 = agg_bufValue_2;
/* 134 */     } else {
/* 135 */       boolean agg_isNull_20 = true;
/* 136 */       long agg_value_20 = -1L;
/* 137 */
/* 138 */       if (!agg_bufIsNull_2) {
/* 139 */         agg_isNull_20 = false; // resultCode could change nullability.
/* 140 */
/* 141 */         agg_value_20 = agg_bufValue_2 + 1L;
/* 142 */
/* 143 */       }
/* 144 */       agg_isNull_17 = agg_isNull_20;
/* 145 */       agg_value_17 = agg_value_20;
/* 146 */     }
/* 147 */     // update aggregation buffer
/* 148 */     agg_bufIsNull_0 = false;
/* 149 */     agg_bufValue_0 = agg_value_6;
/* 150 */
/* 151 */     agg_bufIsNull_1 = agg_isNull_11;
/* 152 */     agg_bufValue_1 = agg_value_11;
/* 153 */
/* 154 */     agg_bufIsNull_2 = agg_isNull_17;
/* 155 */     agg_bufValue_2 = agg_value_17;
/* 156 */
/* 157 */   }
```
You can check the previous discussion in #19082

## How was this patch tested?
Existing tests

Closes #20965 from maropu/SPARK-21870-2.

Authored-by: Takeshi Yamamuro <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
PavithraRamachandran pushed a commit to PavithraRamachandran/spark that referenced this pull request Sep 15, 2019
## What changes were proposed in this pull request?
This pr proposed to split aggregation code into small functions in `HashAggregateExec`. In apache#18810, we got performance regression if JVMs didn't compile too long functions. I checked and I found the codegen of `HashAggregateExec` frequently goes over the limit when a query has too many aggregate functions (e.g., q66 in TPCDS).

The current master places all the generated aggregation code in a single function. In this pr, I modified the code to assign an individual function for each aggregate function (e.g., `SUM`
 and `AVG`). For example, in a query
`SELECT SUM(a), AVG(a) FROM VALUES(1) t(a)`, the proposed code defines two functions
for `SUM(a)` and `AVG(a)` as follows;

- generated  code with this pr (https://gist.github.com/maropu/812990012bc967a78364be0fa793f559):
```
/* 173 */   private void agg_doConsume_0(InternalRow inputadapter_row_0, long agg_expr_0_0, boolean agg_exprIsNull_0_0, double agg_expr_1_0, boolean agg_exprIsNull_1_0, long agg_expr_2_0, boolean agg_exprIsNull_2_0) throws java.io.IOException {
/* 174 */     // do aggregate
/* 175 */     // common sub-expressions
/* 176 */
/* 177 */     // evaluate aggregate functions and update aggregation buffers
/* 178 */     agg_doAggregate_sum_0(agg_exprIsNull_0_0, agg_expr_0_0);
/* 179 */     agg_doAggregate_avg_0(agg_expr_1_0, agg_exprIsNull_1_0, agg_exprIsNull_2_0, agg_expr_2_0);
/* 180 */
/* 181 */   }
...
/* 071 */   private void agg_doAggregate_avg_0(double agg_expr_1_0, boolean agg_exprIsNull_1_0, boolean agg_exprIsNull_2_0, long agg_expr_2_0) throws java.io.IOException {
/* 072 */     // do aggregate for avg
/* 073 */     // evaluate aggregate function
/* 074 */     boolean agg_isNull_19 = true;
/* 075 */     double agg_value_19 = -1.0;
...
/* 114 */   private void agg_doAggregate_sum_0(boolean agg_exprIsNull_0_0, long agg_expr_0_0) throws java.io.IOException {
/* 115 */     // do aggregate for sum
/* 116 */     // evaluate aggregate function
/* 117 */     agg_agg_isNull_11_0 = true;
/* 118 */     long agg_value_11 = -1L;
```

- generated code in the current master (https://gist.github.com/maropu/e9d772af2c98d8991a6a5f0af7841760)
```
/* 059 */   private void agg_doConsume_0(InternalRow localtablescan_row_0, int agg_expr_0_0) throws java.io.IOException {
/* 060 */     // do aggregate
/* 061 */     // common sub-expressions
/* 062 */     boolean agg_isNull_4 = false;
/* 063 */     long agg_value_4 = -1L;
/* 064 */     if (!false) {
/* 065 */       agg_value_4 = (long) agg_expr_0_0;
/* 066 */     }
/* 067 */     // evaluate aggregate function
/* 068 */     agg_agg_isNull_7_0 = true;
/* 069 */     long agg_value_7 = -1L;
/* 070 */     do {
/* 071 */       if (!agg_bufIsNull_0) {
/* 072 */         agg_agg_isNull_7_0 = false;
/* 073 */         agg_value_7 = agg_bufValue_0;
/* 074 */         continue;
/* 075 */       }
/* 076 */
/* 077 */       boolean agg_isNull_9 = false;
/* 078 */       long agg_value_9 = -1L;
/* 079 */       if (!false) {
/* 080 */         agg_value_9 = (long) 0;
/* 081 */       }
/* 082 */       if (!agg_isNull_9) {
/* 083 */         agg_agg_isNull_7_0 = false;
/* 084 */         agg_value_7 = agg_value_9;
/* 085 */         continue;
/* 086 */       }
/* 087 */
/* 088 */     } while (false);
/* 089 */
/* 090 */     long agg_value_6 = -1L;
/* 091 */
/* 092 */     agg_value_6 = agg_value_7 + agg_value_4;
/* 093 */     boolean agg_isNull_11 = true;
/* 094 */     double agg_value_11 = -1.0;
/* 095 */
/* 096 */     if (!agg_bufIsNull_1) {
/* 097 */       agg_agg_isNull_13_0 = true;
/* 098 */       double agg_value_13 = -1.0;
/* 099 */       do {
/* 100 */         boolean agg_isNull_14 = agg_isNull_4;
/* 101 */         double agg_value_14 = -1.0;
/* 102 */         if (!agg_isNull_4) {
/* 103 */           agg_value_14 = (double) agg_value_4;
/* 104 */         }
/* 105 */         if (!agg_isNull_14) {
/* 106 */           agg_agg_isNull_13_0 = false;
/* 107 */           agg_value_13 = agg_value_14;
/* 108 */           continue;
/* 109 */         }
/* 110 */
/* 111 */         boolean agg_isNull_15 = false;
/* 112 */         double agg_value_15 = -1.0;
/* 113 */         if (!false) {
/* 114 */           agg_value_15 = (double) 0;
/* 115 */         }
/* 116 */         if (!agg_isNull_15) {
/* 117 */           agg_agg_isNull_13_0 = false;
/* 118 */           agg_value_13 = agg_value_15;
/* 119 */           continue;
/* 120 */         }
/* 121 */
/* 122 */       } while (false);
/* 123 */
/* 124 */       agg_isNull_11 = false; // resultCode could change nullability.
/* 125 */
/* 126 */       agg_value_11 = agg_bufValue_1 + agg_value_13;
/* 127 */
/* 128 */     }
/* 129 */     boolean agg_isNull_17 = false;
/* 130 */     long agg_value_17 = -1L;
/* 131 */     if (!false && agg_isNull_4) {
/* 132 */       agg_isNull_17 = agg_bufIsNull_2;
/* 133 */       agg_value_17 = agg_bufValue_2;
/* 134 */     } else {
/* 135 */       boolean agg_isNull_20 = true;
/* 136 */       long agg_value_20 = -1L;
/* 137 */
/* 138 */       if (!agg_bufIsNull_2) {
/* 139 */         agg_isNull_20 = false; // resultCode could change nullability.
/* 140 */
/* 141 */         agg_value_20 = agg_bufValue_2 + 1L;
/* 142 */
/* 143 */       }
/* 144 */       agg_isNull_17 = agg_isNull_20;
/* 145 */       agg_value_17 = agg_value_20;
/* 146 */     }
/* 147 */     // update aggregation buffer
/* 148 */     agg_bufIsNull_0 = false;
/* 149 */     agg_bufValue_0 = agg_value_6;
/* 150 */
/* 151 */     agg_bufIsNull_1 = agg_isNull_11;
/* 152 */     agg_bufValue_1 = agg_value_11;
/* 153 */
/* 154 */     agg_bufIsNull_2 = agg_isNull_17;
/* 155 */     agg_bufValue_2 = agg_value_17;
/* 156 */
/* 157 */   }
```
You can check the previous discussion in apache#19082

## How was this patch tested?
Existing tests

Closes apache#20965 from maropu/SPARK-21870-2.

Authored-by: Takeshi Yamamuro <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.