[SPARK-21870][SQL] Split aggregation code into small functions #19082

maropu · 2017-08-30T00:56:25Z

What changes were proposed in this pull request?

This pr proposes to split aggregation code into pieces in HashAggregateExec. In #18810, we got performance regression if JVMs didn't compile too long functions (the limit is 8000 in bytecode size). I checked and I found the codegen of HashAggregateExec frequently goes over the limit, for example:

scala> spark.range(1).selectExpr("id % 1024 AS a", "id AS b").write.saveAsTable("t")
scala> sql("SELECT a, KURTOSIS(b)FROM t GROUP BY a")

This query goes over the limit and the actual bytecode size is 12356.
This pr split the aggregation code into small separate functions and, in a simple example;

scala> sql("SELECT SUM(a), AVG(a) FROM VALUES(1) t(a)").debugCodegen

generated code with this pr:

/* 083 */   private void agg_doAggregateWithoutKey() throws java.io.IOException {
/* 084 */     // initialize aggregation buffer
/* 085 */     final long agg_value = -1L;
/* 086 */     agg_bufIsNull = true;
/* 087 */     agg_bufValue = agg_value;
/* 088 */     boolean agg_isNull1 = false;
/* 089 */     double agg_value1 = -1.0;
/* 090 */     if (!false) {
/* 091 */       agg_value1 = (double) 0;
/* 092 */     }
/* 093 */     agg_bufIsNull1 = agg_isNull1;
/* 094 */     agg_bufValue1 = agg_value1;
/* 095 */     agg_bufIsNull2 = false;
/* 096 */     agg_bufValue2 = 0L;
/* 097 */
/* 098 */     while (inputadapter_input.hasNext() && !stopEarly()) {
/* 099 */       InternalRow inputadapter_row = (InternalRow) inputadapter_input.next();
/* 100 */       boolean inputadapter_isNull = inputadapter_row.isNullAt(0);
/* 101 */       long inputadapter_value = inputadapter_isNull ? -1L : (inputadapter_row.getLong(0));
/* 102 */       boolean inputadapter_isNull1 = inputadapter_row.isNullAt(1);
/* 103 */       double inputadapter_value1 = inputadapter_isNull1 ? -1.0 : (inputadapter_row.getDouble(1));
/* 104 */       boolean inputadapter_isNull2 = inputadapter_row.isNullAt(2);
/* 105 */       long inputadapter_value2 = inputadapter_isNull2 ? -1L : (inputadapter_row.getLong(2));
/* 106 */
/* 107 */       // do aggregate
/* 108 */       // copy aggregation buffer to the local
/* 109 */       boolean agg_localBufIsNull = agg_bufIsNull;
/* 110 */       long agg_localBufValue = agg_bufValue;
/* 111 */       boolean agg_localBufIsNull1 = agg_bufIsNull1;
/* 112 */       double agg_localBufValue1 = agg_bufValue1;
/* 113 */       boolean agg_localBufIsNull2 = agg_bufIsNull2;
/* 114 */       long agg_localBufValue2 = agg_bufValue2;
/* 115 */       // common sub-expressions
/* 116 */
/* 117 */       // process aggregate functions to update aggregation buffer
/* 118 */       agg_doAggregateVal_coalesce(agg_localBufIsNull, agg_localBufValue, inputadapter_value, inputadapter_isNull);
/* 119 */       agg_doAggregateVal_add(agg_localBufValue1, inputadapter_isNull1, inputadapter_value1, agg_localBufIsNull1);
/* 120 */       agg_doAggregateVal_add1(inputadapter_isNull2, inputadapter_value2, agg_localBufIsNull2, agg_localBufValue2);
/* 121 */       if (shouldStop()) return;
/* 122 */     }

generated code in the current master

/* 083 */   private void agg_doAggregateWithoutKey() throws java.io.IOException {
/* 084 */     // initialize aggregation buffer
/* 085 */     final long agg_value = -1L;
/* 086 */     agg_bufIsNull = true;
/* 087 */     agg_bufValue = agg_value;
/* 088 */     boolean agg_isNull1 = false;
/* 089 */     double agg_value1 = -1.0;
/* 090 */     if (!false) {
/* 091 */       agg_value1 = (double) 0;
/* 092 */     }
/* 093 */     agg_bufIsNull1 = agg_isNull1;
/* 094 */     agg_bufValue1 = agg_value1;
/* 095 */     agg_bufIsNull2 = false;
/* 096 */     agg_bufValue2 = 0L;
/* 097 */
/* 098 */     while (inputadapter_input.hasNext() && !stopEarly()) {
/* 099 */       InternalRow inputadapter_row = (InternalRow) inputadapter_input.next();
/* 100 */       boolean inputadapter_isNull = inputadapter_row.isNullAt(0);
/* 101 */       long inputadapter_value = inputadapter_isNull ? -1L : (inputadapter_row.getLong(0));
/* 102 */       boolean inputadapter_isNull1 = inputadapter_row.isNullAt(1);
/* 103 */       double inputadapter_value1 = inputadapter_isNull1 ? -1.0 : (inputadapter_row.getDouble(1));
/* 104 */       boolean inputadapter_isNull2 = inputadapter_row.isNullAt(2);
/* 105 */       long inputadapter_value2 = inputadapter_isNull2 ? -1L : (inputadapter_row.getLong(2));
/* 106 */
/* 107 */       // do aggregate
/* 108 */       // common sub-expressions
/* 109 */
/* 110 */       // evaluate aggregate function
/* 111 */       boolean agg_isNull12 = true;
/* 112 */       long agg_value12 = -1L;
/* 113 */
/* 114 */       boolean agg_isNull13 = agg_bufIsNull;
/* 115 */       long agg_value13 = agg_bufValue;
/* 116 */       if (agg_isNull13) {
/* 117 */         boolean agg_isNull15 = false;
/* 118 */         long agg_value15 = -1L;
/* 119 */         if (!false) {
/* 120 */           agg_value15 = (long) 0;
/* 121 */         }
/* 122 */         if (!agg_isNull15) {
/* 123 */           agg_isNull13 = false;
/* 124 */           agg_value13 = agg_value15;
/* 125 */         }
/* 126 */       }
/* 127 */
/* 128 */       if (!inputadapter_isNull) {
/* 129 */         agg_isNull12 = false; // resultCode could change nullability.
/* 130 */         agg_value12 = agg_value13 + inputadapter_value;
/* 131 */
/* 132 */       }
/* 133 */       boolean agg_isNull11 = agg_isNull12;
/* 134 */       long agg_value11 = agg_value12;
/* 135 */       if (agg_isNull11) {
/* 136 */         if (!agg_bufIsNull) {
/* 137 */           agg_isNull11 = false;
/* 138 */           agg_value11 = agg_bufValue;
/* 139 */         }
/* 140 */       }
/* 141 */       boolean agg_isNull19 = true;
/* 142 */       double agg_value19 = -1.0;
/* 143 */
/* 144 */       if (!agg_bufIsNull1) {
/* 145 */         if (!inputadapter_isNull1) {
/* 146 */           agg_isNull19 = false; // resultCode could change nullability.
/* 147 */           agg_value19 = agg_bufValue1 + inputadapter_value1;
/* 148 */
/* 149 */         }
/* 150 */
/* 151 */       }
/* 152 */       boolean agg_isNull22 = true;
/* 153 */       long agg_value22 = -1L;
/* 154 */
/* 155 */       if (!agg_bufIsNull2) {
/* 156 */         if (!inputadapter_isNull2) {
/* 157 */           agg_isNull22 = false; // resultCode could change nullability.
/* 158 */           agg_value22 = agg_bufValue2 + inputadapter_value2;
/* 159 */
/* 160 */         }
/* 161 */
/* 162 */       }
/* 163 */       // update aggregation buffer
/* 164 */       agg_bufIsNull = agg_isNull11;
/* 165 */       agg_bufValue = agg_value11;
/* 166 */
/* 167 */       agg_bufIsNull1 = agg_isNull19;
/* 168 */       agg_bufValue1 = agg_value19;
/* 169 */
/* 170 */       agg_bufIsNull2 = agg_isNull22;
/* 171 */       agg_bufValue2 = agg_value22;
/* 172 */       if (shouldStop()) return;
/* 173 */     }
/* 174 */
/* 175 */   }
/* 176 */
/* 177 */ }

I did also performance checks;

$ ./bin/spark --master=local[1]
scala> sql("SET spark.sql.shuffle.partitions=4")
scala> spark.range(10000000).selectExpr("id % 1024 AS a", "id AS b").write.saveAsTable("t")
scala> timer { sql("SELECT a, KURTOSIS(b)FROM t GROUP BY a").collect }

master w/ this pr, Avg. Elapsed Time: 2.520551837s
master, Avg. Elapsed Time: 54.029893146199996s

How was this patch tested?

Added tests in WholeStageCodegenSuite.

maropu · 2017-08-30T01:02:00Z

I checked the TPCDS performance;
https://docs.google.com/spreadsheets/d/1DJP8BC8aOE4Fp8t9XAYki0vauCTowt2D1n7lmNXI7xw/edit?usp=sharing
Q66 is explicitly faster and the other queries are almost the same with the previous ones.

SparkQA · 2017-08-30T03:21:33Z

Test build #81235 has finished for PR 19082 at commit 6dacad4.

This patch fails SparkR unit tests.
This patch merges cleanly.
This patch adds no public classes.

maropu · 2017-08-30T03:30:28Z

Jenkins, retest this please.

SparkQA · 2017-08-30T05:50:24Z

Test build #81248 has finished for PR 19082 at commit 6dacad4.

This patch fails SparkR unit tests.
This patch merges cleanly.
This patch adds no public classes.

viirya · 2017-08-30T06:40:41Z

Aggregation code is not the only too long gen'd codes. Any big enough queries under current whole-stage codegen can produce too long gen'd codes.

Compared with special split for aggregation code like this, I'd like to push #18931 which is a more general fix.

maropu · 2017-08-30T06:45:39Z

I didn't look into the code yet though, #18931 also could solve a query below?

 sql("SELECT a, KURTOSIS(b)FROM t GROUP BY a")

viirya · 2017-08-30T06:48:01Z

What it means to solve the query? Is it too long gen'd codes?

maropu · 2017-08-30T06:48:16Z

yes and #18931 covers all the case this pr described? (I feel these two prs solve different too-long gen'd code issues?)

viirya · 2017-08-30T06:55:38Z

After quickly scanning this change, I found actually this and #18931 are orthogonal.

This PR goes to split the aggregation expressions into functions. So it prevents long aggregation code caused by long gen'd codes expressions like Kurtosis.

#18931 goes to break the long gen'd codes caused by chain of execution operators by splitting gen'd codes of operators into functions.

maropu · 2017-08-30T06:59:15Z

yea, I'm with you; it seems they are orthogonal..., but If we have a more general way to solve both, it'd be the best.

kiszk · 2017-08-30T14:47:29Z

sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala

Since $doAggVal is non-static method, this number should be 255.

oh, good catch! I'll fix

kiszk · 2017-08-31T21:40:42Z

sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala

Can we add a check code for the case that a user specify a value that is more than 255?

This is a test-only option, so I think we need not.

viirya · 2017-09-01T05:20:12Z

sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala

OuterReference actually has special meaning in correlated subquery. This name can be confusing.

ok, I'll rename this.

rednaxelafx

Nice! I like the direction this PR is going. Just some nits in inline comments:

rednaxelafx · 2017-09-01T06:20:58Z

sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala

I know the regular expression is tempting, but there's actually a better way to do this along your idea, under the current framework.

I've got a piece of code sitting in my own workspace that checks for Java identifiers:

object CodegenContext { private val javaKeywords = Set( "abstract", "assert", "boolean", "break", "byte", "case", "catch", "char", "class", "const", "continue", "default", "do", "double", "else", "extends", "false", "final", "finally", "float", "for", "goto", "if", "implements", "import", "instanceof", "int", "interface", "long", "native", "new", "null", "package", "private", "protected", "public", "return", "short", "static", "strictfp", "super", "switch", "synchronized", "this", "throw", "throws", "transient", "true", "try", "void", "volatile", "while" ) def isJavaIdentifier(str: String): Boolean = str match { case null | "" => false case _ => java.lang.Character.isJavaIdentifierStart(str.charAt(0)) && (1 until str.length).forall( i => java.lang.Character.isJavaIdentifierPart(str.charAt(i))) && !javaKeywords.contains(str) } }

Feel free to use it here if you'd like. This is the way java.lang.Character.isJavaIdentifierStart() and java.lang.Character.isJavaIdentifierPart() is supposed to be used anyway, nothing creative.

If you want to use it in a case like the way you're using the regular expression, just wrap the util above into an unapply(). But I'd say simply making def isVariable(nameId: String) = CodegenContext.isJavaIdentifier(nameId) is clean enough.

Good suggestion and I'm also looking for other better one. I'll try to fix.

rednaxelafx · 2017-09-01T06:25:13Z

sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala

Hmm. Just a cosmetic style comment: I would have declared addIfNotLiteral with a def instead of making it a scala.Function2[String, String, Unit].

BTW, can we add a comment to val argSet for what those two fields of the Tuple2[String, String] means? And then also make this addIfNotLiteral function take the arguments in the same order as the tuple.

SparkQA · 2017-09-02T00:34:05Z

Test build #81330 has finished for PR 19082 at commit e432136.

This patch fails Scala style tests.
This patch merges cleanly.
This patch adds no public classes.

SparkQA · 2017-09-02T00:38:50Z

Test build #81331 has finished for PR 19082 at commit 57c8280.

This patch fails Scala style tests.
This patch merges cleanly.
This patch adds no public classes.

SparkQA · 2017-09-02T02:05:49Z

Test build #81329 has finished for PR 19082 at commit abc5450.

This patch fails SparkR unit tests.
This patch merges cleanly.
This patch adds no public classes.

SparkQA · 2017-09-02T02:08:48Z

Test build #81328 has finished for PR 19082 at commit 78d0a00.

This patch fails SparkR unit tests.
This patch merges cleanly.
This patch adds no public classes.

SparkQA · 2017-09-02T03:21:45Z

Test build #81332 has finished for PR 19082 at commit e3722a0.

This patch fails SparkR unit tests.
This patch merges cleanly.
This patch adds no public classes.

SparkQA · 2017-09-03T07:04:47Z

Test build #81351 has finished for PR 19082 at commit a2b3dd3.

This patch fails due to an unknown error code, -9.
This patch merges cleanly.
This patch adds no public classes.

maropu · 2017-09-03T07:05:40Z

Jenkins, retest this please.

SparkQA · 2017-09-03T09:45:37Z

Test build #81354 has finished for PR 19082 at commit a2b3dd3.

This patch passes all tests.
This patch merges cleanly.
This patch adds no public classes.

maropu · 2017-09-03T09:46:57Z

@viirya @rednaxelafx @kiszk okay, could you check again?

kiszk · 2017-09-05T06:02:56Z

sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala

If line 314 uses <=, this should be 254. In the previous commit, < was used.

SparkQA · 2017-09-05T10:12:41Z

Test build #81400 has finished for PR 19082 at commit 6c502a9.

This patch passes all tests.
This patch merges cleanly.
This patch adds no public classes.

SparkQA · 2017-12-11T18:40:50Z

Test build #84717 has finished for PR 19082 at commit 1ed010f.

This patch fails SparkR unit tests.
This patch merges cleanly.
This patch adds no public classes.

kiszk · 2017-12-11T18:57:15Z

Jenkins, retest this please

SparkQA · 2017-12-11T21:50:20Z

Test build #84724 has finished for PR 19082 at commit 1ed010f.

This patch fails SparkR unit tests.
This patch merges cleanly.
This patch adds no public classes.

gatorsmile · 2017-12-11T22:41:36Z

Just FYI, the test failure is not related to this PR.

maropu · 2017-12-12T01:22:51Z

sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala

-    ctx.currentVars = bufVars ++ input
+
+    // We need to copy the aggregation buffer to local variables first because each aggregate
+    // function directly updates the buffer when it finishes.


just FYI: we must need local copys from this discussions, too #19865

SparkQA · 2017-12-12T02:10:13Z

Test build #84734 has finished for PR 19082 at commit 0e5d366.

This patch fails SparkR unit tests.
This patch merges cleanly.
This patch adds no public classes.

maropu · 2017-12-12T14:11:48Z

retest this please

SparkQA · 2017-12-12T16:58:44Z

Test build #84770 has finished for PR 19082 at commit 0e5d366.

This patch passes all tests.
This patch merges cleanly.
This patch adds no public classes.

SparkQA · 2017-12-12T17:32:07Z

Test build #84774 has finished for PR 19082 at commit 4077608.

This patch passes all tests.
This patch merges cleanly.
This patch adds no public classes.

SparkQA · 2017-12-13T02:28:41Z

Test build #84807 has finished for PR 19082 at commit 2555e5a.

This patch passes all tests.
This patch merges cleanly.
This patch adds no public classes.

maropu · 2017-12-13T23:11:39Z

Since #19813 fixes this issue, so I'll close this. Thanks!

maropu · 2017-12-13T23:16:00Z

@gatorsmile can you close the jira ticket with @viirya assigned? Thanks!

cloud-fan · 2018-01-23T16:46:04Z

sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala

    val updateRowInRegularHashMap: String = {
-      ctx.INPUT_ROW = unsafeRowBuffer
+      // We need to copy the aggregation row buffer to a local row first because each aggregate
+      // function directly updates the buffer when it finishes.


why does this matter? We should avoid unnecessary data copy as possible as we can.

We need this copy because: #19082 (comment)

cloud-fan · 2018-01-23T17:10:53Z

Sorry for missing the excellent discussions here. After more thoughts, I feel this patch still makes sense. Currently we have 3 cases that may cause large method with whole stage codegen:

a super complex expression. Eventually this would be fixed by [SPARK-22600][SQL] Fix 64kb limit for deeply nested expressions under wholestage codegen #19813
a complex single operator, e.g. aggregate with 100 aggregate functions. This is targeted by this PR.
a complex query. This is targeted by [SPARK-21717][SQL] Decouple consume functions of physical operators in whole-stage codegen #18931

All of them are orthogonal but non-trivial, I don't see any of them can cover all other cases.

rednaxelafx

Rebasing this PR to the latest master would be interestingly involved...

rednaxelafx · 2018-04-05T07:06:58Z

sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala

+      stack.pop() match {
+        case e if subExprs.contains(e) =>
+          val exprCode = subExprs(e)
+          if (CodegenContext.isJavaIdentifier(exprCode.value)) {


Once we have @viirya 's #20043 merged we won't need the ugly CodegenContext.isJavaIdentifier hack any more >_<|||

hey, good news! Thanks for letting me know ;)

kevinjmh · 2019-03-05T01:21:19Z

Since #19813 fixes this issue, so I'll close this. Thanks!

I track to here from jira ticket SPARK-23791, which contains a test case.
#19813 is merged and then reverted. From the discussion under #19813, #21193 is raised.
But when I run the testcase on spark 2.4, I don't think the issue is fix.

Test result of 3 rounds : (coden on, codegen off)
(80.807065794,13.359716698),
(73.170440475,10.728186286),
(72.61997856,10.436293345))

maropu · 2019-03-05T07:25:49Z

Yea, this issue is not fixed now. As you know, it's a long story for this issue...
I closed this pr because this is just a band-aid fix. I'm not 100% sure the current status, but IIUC we still don't have a global consensu about how to fix this in the community.
Probably, @gatorsmile and @cloud-fan might know more.

kevinjmh · 2019-03-05T07:35:07Z

Thanks for your reply.
@gatorsmile @cloud-fan, is there any update about the root cause and solution for this problem? (Performance degrade when run aggregation query on multi-columns with codeGen enable)

## What changes were proposed in this pull request? This pr proposed to split aggregation code into small functions in `HashAggregateExec`. In #18810, we got performance regression if JVMs didn't compile too long functions. I checked and I found the codegen of `HashAggregateExec` frequently goes over the limit when a query has too many aggregate functions (e.g., q66 in TPCDS). The current master places all the generated aggregation code in a single function. In this pr, I modified the code to assign an individual function for each aggregate function (e.g., `SUM` and `AVG`). For example, in a query `SELECT SUM(a), AVG(a) FROM VALUES(1) t(a)`, the proposed code defines two functions for `SUM(a)` and `AVG(a)` as follows; - generated code with this pr (https://gist.github.com/maropu/812990012bc967a78364be0fa793f559): ``` /* 173 */ private void agg_doConsume_0(InternalRow inputadapter_row_0, long agg_expr_0_0, boolean agg_exprIsNull_0_0, double agg_expr_1_0, boolean agg_exprIsNull_1_0, long agg_expr_2_0, boolean agg_exprIsNull_2_0) throws java.io.IOException { /* 174 */ // do aggregate /* 175 */ // common sub-expressions /* 176 */ /* 177 */ // evaluate aggregate functions and update aggregation buffers /* 178 */ agg_doAggregate_sum_0(agg_exprIsNull_0_0, agg_expr_0_0); /* 179 */ agg_doAggregate_avg_0(agg_expr_1_0, agg_exprIsNull_1_0, agg_exprIsNull_2_0, agg_expr_2_0); /* 180 */ /* 181 */ } ... /* 071 */ private void agg_doAggregate_avg_0(double agg_expr_1_0, boolean agg_exprIsNull_1_0, boolean agg_exprIsNull_2_0, long agg_expr_2_0) throws java.io.IOException { /* 072 */ // do aggregate for avg /* 073 */ // evaluate aggregate function /* 074 */ boolean agg_isNull_19 = true; /* 075 */ double agg_value_19 = -1.0; ... /* 114 */ private void agg_doAggregate_sum_0(boolean agg_exprIsNull_0_0, long agg_expr_0_0) throws java.io.IOException { /* 115 */ // do aggregate for sum /* 116 */ // evaluate aggregate function /* 117 */ agg_agg_isNull_11_0 = true; /* 118 */ long agg_value_11 = -1L; ``` - generated code in the current master (https://gist.github.com/maropu/e9d772af2c98d8991a6a5f0af7841760) ``` /* 059 */ private void agg_doConsume_0(InternalRow localtablescan_row_0, int agg_expr_0_0) throws java.io.IOException { /* 060 */ // do aggregate /* 061 */ // common sub-expressions /* 062 */ boolean agg_isNull_4 = false; /* 063 */ long agg_value_4 = -1L; /* 064 */ if (!false) { /* 065 */ agg_value_4 = (long) agg_expr_0_0; /* 066 */ } /* 067 */ // evaluate aggregate function /* 068 */ agg_agg_isNull_7_0 = true; /* 069 */ long agg_value_7 = -1L; /* 070 */ do { /* 071 */ if (!agg_bufIsNull_0) { /* 072 */ agg_agg_isNull_7_0 = false; /* 073 */ agg_value_7 = agg_bufValue_0; /* 074 */ continue; /* 075 */ } /* 076 */ /* 077 */ boolean agg_isNull_9 = false; /* 078 */ long agg_value_9 = -1L; /* 079 */ if (!false) { /* 080 */ agg_value_9 = (long) 0; /* 081 */ } /* 082 */ if (!agg_isNull_9) { /* 083 */ agg_agg_isNull_7_0 = false; /* 084 */ agg_value_7 = agg_value_9; /* 085 */ continue; /* 086 */ } /* 087 */ /* 088 */ } while (false); /* 089 */ /* 090 */ long agg_value_6 = -1L; /* 091 */ /* 092 */ agg_value_6 = agg_value_7 + agg_value_4; /* 093 */ boolean agg_isNull_11 = true; /* 094 */ double agg_value_11 = -1.0; /* 095 */ /* 096 */ if (!agg_bufIsNull_1) { /* 097 */ agg_agg_isNull_13_0 = true; /* 098 */ double agg_value_13 = -1.0; /* 099 */ do { /* 100 */ boolean agg_isNull_14 = agg_isNull_4; /* 101 */ double agg_value_14 = -1.0; /* 102 */ if (!agg_isNull_4) { /* 103 */ agg_value_14 = (double) agg_value_4; /* 104 */ } /* 105 */ if (!agg_isNull_14) { /* 106 */ agg_agg_isNull_13_0 = false; /* 107 */ agg_value_13 = agg_value_14; /* 108 */ continue; /* 109 */ } /* 110 */ /* 111 */ boolean agg_isNull_15 = false; /* 112 */ double agg_value_15 = -1.0; /* 113 */ if (!false) { /* 114 */ agg_value_15 = (double) 0; /* 115 */ } /* 116 */ if (!agg_isNull_15) { /* 117 */ agg_agg_isNull_13_0 = false; /* 118 */ agg_value_13 = agg_value_15; /* 119 */ continue; /* 120 */ } /* 121 */ /* 122 */ } while (false); /* 123 */ /* 124 */ agg_isNull_11 = false; // resultCode could change nullability. /* 125 */ /* 126 */ agg_value_11 = agg_bufValue_1 + agg_value_13; /* 127 */ /* 128 */ } /* 129 */ boolean agg_isNull_17 = false; /* 130 */ long agg_value_17 = -1L; /* 131 */ if (!false && agg_isNull_4) { /* 132 */ agg_isNull_17 = agg_bufIsNull_2; /* 133 */ agg_value_17 = agg_bufValue_2; /* 134 */ } else { /* 135 */ boolean agg_isNull_20 = true; /* 136 */ long agg_value_20 = -1L; /* 137 */ /* 138 */ if (!agg_bufIsNull_2) { /* 139 */ agg_isNull_20 = false; // resultCode could change nullability. /* 140 */ /* 141 */ agg_value_20 = agg_bufValue_2 + 1L; /* 142 */ /* 143 */ } /* 144 */ agg_isNull_17 = agg_isNull_20; /* 145 */ agg_value_17 = agg_value_20; /* 146 */ } /* 147 */ // update aggregation buffer /* 148 */ agg_bufIsNull_0 = false; /* 149 */ agg_bufValue_0 = agg_value_6; /* 150 */ /* 151 */ agg_bufIsNull_1 = agg_isNull_11; /* 152 */ agg_bufValue_1 = agg_value_11; /* 153 */ /* 154 */ agg_bufIsNull_2 = agg_isNull_17; /* 155 */ agg_bufValue_2 = agg_value_17; /* 156 */ /* 157 */ } ``` You can check the previous discussion in #19082 ## How was this patch tested? Existing tests Closes #20965 from maropu/SPARK-21870-2. Authored-by: Takeshi Yamamuro <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>

## What changes were proposed in this pull request? This pr proposed to split aggregation code into small functions in `HashAggregateExec`. In apache#18810, we got performance regression if JVMs didn't compile too long functions. I checked and I found the codegen of `HashAggregateExec` frequently goes over the limit when a query has too many aggregate functions (e.g., q66 in TPCDS). The current master places all the generated aggregation code in a single function. In this pr, I modified the code to assign an individual function for each aggregate function (e.g., `SUM` and `AVG`). For example, in a query `SELECT SUM(a), AVG(a) FROM VALUES(1) t(a)`, the proposed code defines two functions for `SUM(a)` and `AVG(a)` as follows; - generated code with this pr (https://gist.github.com/maropu/812990012bc967a78364be0fa793f559): ``` /* 173 */ private void agg_doConsume_0(InternalRow inputadapter_row_0, long agg_expr_0_0, boolean agg_exprIsNull_0_0, double agg_expr_1_0, boolean agg_exprIsNull_1_0, long agg_expr_2_0, boolean agg_exprIsNull_2_0) throws java.io.IOException { /* 174 */ // do aggregate /* 175 */ // common sub-expressions /* 176 */ /* 177 */ // evaluate aggregate functions and update aggregation buffers /* 178 */ agg_doAggregate_sum_0(agg_exprIsNull_0_0, agg_expr_0_0); /* 179 */ agg_doAggregate_avg_0(agg_expr_1_0, agg_exprIsNull_1_0, agg_exprIsNull_2_0, agg_expr_2_0); /* 180 */ /* 181 */ } ... /* 071 */ private void agg_doAggregate_avg_0(double agg_expr_1_0, boolean agg_exprIsNull_1_0, boolean agg_exprIsNull_2_0, long agg_expr_2_0) throws java.io.IOException { /* 072 */ // do aggregate for avg /* 073 */ // evaluate aggregate function /* 074 */ boolean agg_isNull_19 = true; /* 075 */ double agg_value_19 = -1.0; ... /* 114 */ private void agg_doAggregate_sum_0(boolean agg_exprIsNull_0_0, long agg_expr_0_0) throws java.io.IOException { /* 115 */ // do aggregate for sum /* 116 */ // evaluate aggregate function /* 117 */ agg_agg_isNull_11_0 = true; /* 118 */ long agg_value_11 = -1L; ``` - generated code in the current master (https://gist.github.com/maropu/e9d772af2c98d8991a6a5f0af7841760) ``` /* 059 */ private void agg_doConsume_0(InternalRow localtablescan_row_0, int agg_expr_0_0) throws java.io.IOException { /* 060 */ // do aggregate /* 061 */ // common sub-expressions /* 062 */ boolean agg_isNull_4 = false; /* 063 */ long agg_value_4 = -1L; /* 064 */ if (!false) { /* 065 */ agg_value_4 = (long) agg_expr_0_0; /* 066 */ } /* 067 */ // evaluate aggregate function /* 068 */ agg_agg_isNull_7_0 = true; /* 069 */ long agg_value_7 = -1L; /* 070 */ do { /* 071 */ if (!agg_bufIsNull_0) { /* 072 */ agg_agg_isNull_7_0 = false; /* 073 */ agg_value_7 = agg_bufValue_0; /* 074 */ continue; /* 075 */ } /* 076 */ /* 077 */ boolean agg_isNull_9 = false; /* 078 */ long agg_value_9 = -1L; /* 079 */ if (!false) { /* 080 */ agg_value_9 = (long) 0; /* 081 */ } /* 082 */ if (!agg_isNull_9) { /* 083 */ agg_agg_isNull_7_0 = false; /* 084 */ agg_value_7 = agg_value_9; /* 085 */ continue; /* 086 */ } /* 087 */ /* 088 */ } while (false); /* 089 */ /* 090 */ long agg_value_6 = -1L; /* 091 */ /* 092 */ agg_value_6 = agg_value_7 + agg_value_4; /* 093 */ boolean agg_isNull_11 = true; /* 094 */ double agg_value_11 = -1.0; /* 095 */ /* 096 */ if (!agg_bufIsNull_1) { /* 097 */ agg_agg_isNull_13_0 = true; /* 098 */ double agg_value_13 = -1.0; /* 099 */ do { /* 100 */ boolean agg_isNull_14 = agg_isNull_4; /* 101 */ double agg_value_14 = -1.0; /* 102 */ if (!agg_isNull_4) { /* 103 */ agg_value_14 = (double) agg_value_4; /* 104 */ } /* 105 */ if (!agg_isNull_14) { /* 106 */ agg_agg_isNull_13_0 = false; /* 107 */ agg_value_13 = agg_value_14; /* 108 */ continue; /* 109 */ } /* 110 */ /* 111 */ boolean agg_isNull_15 = false; /* 112 */ double agg_value_15 = -1.0; /* 113 */ if (!false) { /* 114 */ agg_value_15 = (double) 0; /* 115 */ } /* 116 */ if (!agg_isNull_15) { /* 117 */ agg_agg_isNull_13_0 = false; /* 118 */ agg_value_13 = agg_value_15; /* 119 */ continue; /* 120 */ } /* 121 */ /* 122 */ } while (false); /* 123 */ /* 124 */ agg_isNull_11 = false; // resultCode could change nullability. /* 125 */ /* 126 */ agg_value_11 = agg_bufValue_1 + agg_value_13; /* 127 */ /* 128 */ } /* 129 */ boolean agg_isNull_17 = false; /* 130 */ long agg_value_17 = -1L; /* 131 */ if (!false && agg_isNull_4) { /* 132 */ agg_isNull_17 = agg_bufIsNull_2; /* 133 */ agg_value_17 = agg_bufValue_2; /* 134 */ } else { /* 135 */ boolean agg_isNull_20 = true; /* 136 */ long agg_value_20 = -1L; /* 137 */ /* 138 */ if (!agg_bufIsNull_2) { /* 139 */ agg_isNull_20 = false; // resultCode could change nullability. /* 140 */ /* 141 */ agg_value_20 = agg_bufValue_2 + 1L; /* 142 */ /* 143 */ } /* 144 */ agg_isNull_17 = agg_isNull_20; /* 145 */ agg_value_17 = agg_value_20; /* 146 */ } /* 147 */ // update aggregation buffer /* 148 */ agg_bufIsNull_0 = false; /* 149 */ agg_bufValue_0 = agg_value_6; /* 150 */ /* 151 */ agg_bufIsNull_1 = agg_isNull_11; /* 152 */ agg_bufValue_1 = agg_value_11; /* 153 */ /* 154 */ agg_bufIsNull_2 = agg_isNull_17; /* 155 */ agg_bufValue_2 = agg_value_17; /* 156 */ /* 157 */ } ``` You can check the previous discussion in apache#19082 ## How was this patch tested? Existing tests Closes apache#20965 from maropu/SPARK-21870-2. Authored-by: Takeshi Yamamuro <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>

maropu mentioned this pull request Aug 30, 2017

[SPARK-21871][SQL] Check actual bytecode size when compiling generated code #19083

Closed

kiszk reviewed Aug 30, 2017

View reviewed changes

kiszk reviewed Aug 31, 2017

View reviewed changes

kiszk mentioned this pull request Sep 1, 2017

[SPARK-21717][SQL] Decouple consume functions of physical operators in whole-stage codegen #18931

Closed

viirya reviewed Sep 1, 2017

View reviewed changes

rednaxelafx reviewed Sep 1, 2017

View reviewed changes

maropu force-pushed the SPARK-21870 branch 2 times, most recently from e432136 to 57c8280 Compare September 2, 2017 00:33

maropu force-pushed the SPARK-21870 branch from 57c8280 to e3722a0 Compare September 2, 2017 00:45

kiszk reviewed Sep 5, 2017

View reviewed changes

Add indents

0e5d366

maropu commented Dec 12, 2017

View reviewed changes

Add parameter names

2555e5a

maropu force-pushed the SPARK-21870 branch from 4077608 to 2555e5a Compare December 12, 2017 23:31

kiszk mentioned this pull request Dec 13, 2017

[SPARK-22668][SQL] Ensure no global variables in arguments of method split by CodegenContext.splitExpressions() #19865

Closed

maropu closed this Dec 13, 2017

cloud-fan reviewed Jan 23, 2018

View reviewed changes

maropu mentioned this pull request Apr 3, 2018

[SPARK-21870][SQL] Split aggregation code into small functions #20965

Closed

rednaxelafx reviewed Apr 5, 2018

View reviewed changes

[SPARK-21870][SQL] Split aggregation code into small functions #19082

[SPARK-21870][SQL] Split aggregation code into small functions #19082

Uh oh!

Conversation

maropu commented Aug 30, 2017 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

What changes were proposed in this pull request?

How was this patch tested?

Uh oh!

maropu commented Aug 30, 2017

Uh oh!

SparkQA commented Aug 30, 2017

Uh oh!

maropu commented Aug 30, 2017

Uh oh!

SparkQA commented Aug 30, 2017

Uh oh!

viirya commented Aug 30, 2017

Uh oh!

maropu commented Aug 30, 2017 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

viirya commented Aug 30, 2017

Uh oh!

maropu commented Aug 30, 2017 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

viirya commented Aug 30, 2017

Uh oh!

maropu commented Aug 30, 2017

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

kiszk Aug 31, 2017 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

maropu Sep 1, 2017 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

rednaxelafx left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

SparkQA commented Sep 2, 2017

Uh oh!

SparkQA commented Sep 2, 2017

Uh oh!

SparkQA commented Sep 2, 2017

Uh oh!

SparkQA commented Sep 2, 2017

Uh oh!

SparkQA commented Sep 2, 2017

Uh oh!

SparkQA commented Sep 3, 2017

Uh oh!

maropu commented Sep 3, 2017

Uh oh!

SparkQA commented Sep 3, 2017

Uh oh!

maropu commented Sep 3, 2017

Uh oh!

kiszk Sep 5, 2017 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

SparkQA commented Sep 5, 2017

Uh oh!

maropu commented Aug 30, 2017 •

edited

Loading

maropu commented Aug 30, 2017 •

edited

Loading

maropu commented Aug 30, 2017 •

edited

Loading

kiszk Aug 31, 2017 •

edited

Loading

maropu Sep 1, 2017 •

edited

Loading

kiszk Sep 5, 2017 •

edited

Loading

kevinjmh commented Mar 5, 2019 •

edited

Loading