Skip to content

Commit b4c26e3

Browse files
kiszkrdblue
authored andcommitted
[SPARK-21720][SQL] Fix 64KB JVM bytecode limit problem with AND or OR
This PR changes `AND` or `OR` code generation to place condition and then expressions' generated code into separated methods if these size could be large. When the method is newly generated, variables for `isNull` and `value` are declared as an instance variable to pass these values (e.g. `isNull1409` and `value1409`) to the callers of the generated method. This PR resolved two cases: * large code size of left expression * large code size of right expression Added a new test case into `CodeGenerationSuite` Author: Kazuaki Ishizaki <[email protected]> Closes apache#18972 from kiszk/SPARK-21720. (cherry picked from commit 9bf696d) Signed-off-by: Wenchen Fan <[email protected]>
1 parent 27e638e commit b4c26e3

File tree

4 files changed

+152
-28
lines changed

4 files changed

+152
-28
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,36 @@ class CodegenContext {
692692
}
693693
}
694694

695+
/**
696+
* Wrap the generated code of expression, which was created from a row object in INPUT_ROW,
697+
* by a function. ev.isNull and ev.value are passed by global variables
698+
*
699+
* @param ev the code to evaluate expressions.
700+
* @param dataType the data type of ev.value.
701+
* @param baseFuncName the split function name base.
702+
*/
703+
def createAndAddFunction(
704+
ev: ExprCode,
705+
dataType: DataType,
706+
baseFuncName: String): (String, String, String) = {
707+
val globalIsNull = freshName("isNull")
708+
addMutableState("boolean", globalIsNull, s"$globalIsNull = false;")
709+
val globalValue = freshName("value")
710+
addMutableState(javaType(dataType), globalValue,
711+
s"$globalValue = ${defaultValue(dataType)};")
712+
val funcName = freshName(baseFuncName)
713+
val funcBody =
714+
s"""
715+
|private void $funcName(InternalRow ${INPUT_ROW}) {
716+
| ${ev.code.trim}
717+
| $globalIsNull = ${ev.isNull};
718+
| $globalValue = ${ev.value};
719+
|}
720+
""".stripMargin
721+
addNewFunction(funcName, funcBody)
722+
(funcName, globalIsNull, globalValue)
723+
}
724+
695725
/**
696726
* Perform a function which generates a sequence of ExprCodes with a given mapping between
697727
* expressions and common expressions, instead of using the mapping in current context.

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala

Lines changed: 3 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,11 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
7272
(ctx.INPUT_ROW != null && ctx.currentVars == null)) {
7373

7474
val (condFuncName, condGlobalIsNull, condGlobalValue) =
75-
createAndAddFunction(ctx, condEval, predicate.dataType, "evalIfCondExpr")
75+
ctx.createAndAddFunction(condEval, predicate.dataType, "evalIfCondExpr")
7676
val (trueFuncName, trueGlobalIsNull, trueGlobalValue) =
77-
createAndAddFunction(ctx, trueEval, trueValue.dataType, "evalIfTrueExpr")
77+
ctx.createAndAddFunction(trueEval, trueValue.dataType, "evalIfTrueExpr")
7878
val (falseFuncName, falseGlobalIsNull, falseGlobalValue) =
79-
createAndAddFunction(ctx, falseEval, falseValue.dataType, "evalIfFalseExpr")
79+
ctx.createAndAddFunction(falseEval, falseValue.dataType, "evalIfFalseExpr")
8080
s"""
8181
$condFuncName(${ctx.INPUT_ROW});
8282
boolean ${ev.isNull} = false;
@@ -112,29 +112,6 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
112112
ev.copy(code = generatedCode)
113113
}
114114

115-
private def createAndAddFunction(
116-
ctx: CodegenContext,
117-
ev: ExprCode,
118-
dataType: DataType,
119-
baseFuncName: String): (String, String, String) = {
120-
val globalIsNull = ctx.freshName("isNull")
121-
ctx.addMutableState("boolean", globalIsNull, s"$globalIsNull = false;")
122-
val globalValue = ctx.freshName("value")
123-
ctx.addMutableState(ctx.javaType(dataType), globalValue,
124-
s"$globalValue = ${ctx.defaultValue(dataType)};")
125-
val funcName = ctx.freshName(baseFuncName)
126-
val funcBody =
127-
s"""
128-
|private void $funcName(InternalRow ${ctx.INPUT_ROW}) {
129-
| ${ev.code.trim}
130-
| $globalIsNull = ${ev.isNull};
131-
| $globalValue = ${ev.value};
132-
|}
133-
""".stripMargin
134-
ctx.addNewFunction(funcName, funcBody)
135-
(funcName, globalIsNull, globalValue)
136-
}
137-
138115
override def toString: String = s"if ($predicate) $trueValue else $falseValue"
139116

140117
override def sql: String = s"(IF(${predicate.sql}, ${trueValue.sql}, ${falseValue.sql}))"

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,46 @@ case class And(left: Expression, right: Expression) extends BinaryOperator with
291291
val eval2 = right.genCode(ctx)
292292

293293
// The result should be `false`, if any of them is `false` whenever the other is null or not.
294-
if (!left.nullable && !right.nullable) {
294+
295+
// place generated code of eval1 and eval2 in separate methods if their code combined is large
296+
val combinedLength = eval1.code.length + eval2.code.length
297+
if (combinedLength > 1024 &&
298+
// Split these expressions only if they are created from a row object
299+
(ctx.INPUT_ROW != null && ctx.currentVars == null)) {
300+
301+
val (eval1FuncName, eval1GlobalIsNull, eval1GlobalValue) =
302+
ctx.createAndAddFunction(eval1, BooleanType, "eval1Expr")
303+
val (eval2FuncName, eval2GlobalIsNull, eval2GlobalValue) =
304+
ctx.createAndAddFunction(eval2, BooleanType, "eval2Expr")
305+
if (!left.nullable && !right.nullable) {
306+
val generatedCode = s"""
307+
$eval1FuncName(${ctx.INPUT_ROW});
308+
boolean ${ev.value} = false;
309+
if (${eval1GlobalValue}) {
310+
$eval2FuncName(${ctx.INPUT_ROW});
311+
${ev.value} = ${eval2GlobalValue};
312+
}
313+
"""
314+
ev.copy(code = generatedCode, isNull = "false")
315+
} else {
316+
val generatedCode = s"""
317+
$eval1FuncName(${ctx.INPUT_ROW});
318+
boolean ${ev.isNull} = false;
319+
boolean ${ev.value} = false;
320+
if (!${eval1GlobalIsNull} && !${eval1GlobalValue}) {
321+
} else {
322+
$eval2FuncName(${ctx.INPUT_ROW});
323+
if (!${eval2GlobalIsNull} && !${eval2GlobalValue}) {
324+
} else if (!${eval1GlobalIsNull} && !${eval2GlobalIsNull}) {
325+
${ev.value} = true;
326+
} else {
327+
${ev.isNull} = true;
328+
}
329+
}
330+
"""
331+
ev.copy(code = generatedCode)
332+
}
333+
} else if (!left.nullable && !right.nullable) {
295334
ev.copy(code = s"""
296335
${eval1.code}
297336
boolean ${ev.value} = false;
@@ -354,7 +393,46 @@ case class Or(left: Expression, right: Expression) extends BinaryOperator with P
354393
val eval2 = right.genCode(ctx)
355394

356395
// The result should be `true`, if any of them is `true` whenever the other is null or not.
357-
if (!left.nullable && !right.nullable) {
396+
397+
// place generated code of eval1 and eval2 in separate methods if their code combined is large
398+
val combinedLength = eval1.code.length + eval2.code.length
399+
if (combinedLength > 1024 &&
400+
// Split these expressions only if they are created from a row object
401+
(ctx.INPUT_ROW != null && ctx.currentVars == null)) {
402+
403+
val (eval1FuncName, eval1GlobalIsNull, eval1GlobalValue) =
404+
ctx.createAndAddFunction(eval1, BooleanType, "eval1Expr")
405+
val (eval2FuncName, eval2GlobalIsNull, eval2GlobalValue) =
406+
ctx.createAndAddFunction(eval2, BooleanType, "eval2Expr")
407+
if (!left.nullable && !right.nullable) {
408+
val generatedCode = s"""
409+
$eval1FuncName(${ctx.INPUT_ROW});
410+
boolean ${ev.value} = true;
411+
if (!${eval1GlobalValue}) {
412+
$eval2FuncName(${ctx.INPUT_ROW});
413+
${ev.value} = ${eval2GlobalValue};
414+
}
415+
"""
416+
ev.copy(code = generatedCode, isNull = "false")
417+
} else {
418+
val generatedCode = s"""
419+
$eval1FuncName(${ctx.INPUT_ROW});
420+
boolean ${ev.isNull} = false;
421+
boolean ${ev.value} = true;
422+
if (!${eval1GlobalIsNull} && ${eval1GlobalValue}) {
423+
} else {
424+
$eval2FuncName(${ctx.INPUT_ROW});
425+
if (!${eval2GlobalIsNull} && ${eval2GlobalValue}) {
426+
} else if (!${eval1GlobalIsNull} && !${eval2GlobalIsNull}) {
427+
${ev.value} = false;
428+
} else {
429+
${ev.isNull} = true;
430+
}
431+
}
432+
"""
433+
ev.copy(code = generatedCode)
434+
}
435+
} else if (!left.nullable && !right.nullable) {
358436
ev.isNull = "false"
359437
ev.copy(code = s"""
360438
${eval1.code}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,4 +311,43 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
311311
test("SPARK-17160: field names are properly escaped by AssertTrue") {
312312
GenerateUnsafeProjection.generate(AssertTrue(Cast(Literal("\""), BooleanType)) :: Nil)
313313
}
314+
315+
test("SPARK-21720: split large predications into blocks due to JVM code size limit") {
316+
val length = 600
317+
318+
val input = new GenericInternalRow(length)
319+
val utf8Str = UTF8String.fromString(s"abc")
320+
for (i <- 0 until length) {
321+
input.update(i, utf8Str)
322+
}
323+
324+
var exprOr: Expression = Literal(false)
325+
for (i <- 0 until length) {
326+
exprOr = Or(EqualTo(BoundReference(i, StringType, true), Literal(s"c$i")), exprOr)
327+
}
328+
329+
val planOr = GenerateMutableProjection.generate(Seq(exprOr))
330+
val actualOr = planOr(input).toSeq(Seq(exprOr.dataType))
331+
assert(actualOr.length == 1)
332+
val expectedOr = false
333+
334+
if (!checkResult(actualOr.head, expectedOr)) {
335+
fail(s"Incorrect Evaluation: expressions: $exprOr, actual: $actualOr, expected: $expectedOr")
336+
}
337+
338+
var exprAnd: Expression = Literal(true)
339+
for (i <- 0 until length) {
340+
exprAnd = And(EqualTo(BoundReference(i, StringType, true), Literal(s"c$i")), exprAnd)
341+
}
342+
343+
val planAnd = GenerateMutableProjection.generate(Seq(exprAnd))
344+
val actualAnd = planAnd(input).toSeq(Seq(exprAnd.dataType))
345+
assert(actualAnd.length == 1)
346+
val expectedAnd = false
347+
348+
if (!checkResult(actualAnd.head, expectedAnd)) {
349+
fail(
350+
s"Incorrect Evaluation: expressions: $exprAnd, actual: $actualAnd, expected: $expectedAnd")
351+
}
352+
}
314353
}

0 commit comments

Comments
 (0)