Skip to content

Commit 06b0cee

Browse files
lw-lincmonkey
authored andcommitted
[SPARK-16845][SQL] GeneratedClass$SpecificOrdering grows beyond 64 KB
## What changes were proposed in this pull request? Prior to this patch, we'll generate `compare(...)` for `GeneratedClass$SpecificOrdering` like below, leading to Janino exceptions saying the code grows beyond 64 KB. ``` scala /* 005 */ class SpecificOrdering extends o.a.s.sql.catalyst.expressions.codegen.BaseOrdering { /* ..... */ ... /* 10969 */ private int compare(InternalRow a, InternalRow b) { /* 10970 */ InternalRow i = null; // Holds current row being evaluated. /* 10971 */ /* 1.... */ code for comparing field0 /* 1.... */ code for comparing field1 /* 1.... */ ... /* 1.... */ code for comparing field449 /* 15012 */ /* 15013 */ return 0; /* 15014 */ } /* 15015 */ } ``` This patch would break `compare(...)` into smaller `compare_xxx(...)` methods when necessary; then we'll get generated `compare(...)` like: ``` scala /* 001 */ public SpecificOrdering generate(Object[] references) { /* 002 */ return new SpecificOrdering(references); /* 003 */ } /* 004 */ /* 005 */ class SpecificOrdering extends o.a.s.sql.catalyst.expressions.codegen.BaseOrdering { /* 006 */ /* 007 */ ... /* 1.... */ /* 11290 */ private int compare_0(InternalRow a, InternalRow b) { /* 11291 */ InternalRow i = null; // Holds current row being evaluated. /* 11292 */ /* 11293 */ i = a; /* 11294 */ boolean isNullA; /* 11295 */ UTF8String primitiveA; /* 11296 */ { /* 11297 */ /* 11298 */ Object obj = ((Expression) references[0]).eval(null); /* 11299 */ UTF8String value = (UTF8String) obj; /* 11300 */ isNullA = false; /* 11301 */ primitiveA = value; /* 11302 */ } /* 11303 */ i = b; /* 11304 */ boolean isNullB; /* 11305 */ UTF8String primitiveB; /* 11306 */ { /* 11307 */ /* 11308 */ Object obj = ((Expression) references[0]).eval(null); /* 11309 */ UTF8String value = (UTF8String) obj; /* 11310 */ isNullB = false; /* 11311 */ primitiveB = value; /* 11312 */ } /* 11313 */ if (isNullA && isNullB) { /* 11314 */ // Nothing /* 11315 */ } else if (isNullA) { /* 11316 */ return -1; /* 11317 */ } else if (isNullB) { /* 11318 */ return 1; /* 11319 */ } else { /* 11320 */ int comp = primitiveA.compare(primitiveB); /* 11321 */ if (comp != 0) { /* 11322 */ return comp; /* 11323 */ } /* 11324 */ } /* 11325 */ /* 11326 */ /* 11327 */ i = a; /* 11328 */ boolean isNullA1; /* 11329 */ UTF8String primitiveA1; /* 11330 */ { /* 11331 */ /* 11332 */ Object obj1 = ((Expression) references[1]).eval(null); /* 11333 */ UTF8String value1 = (UTF8String) obj1; /* 11334 */ isNullA1 = false; /* 11335 */ primitiveA1 = value1; /* 11336 */ } /* 11337 */ i = b; /* 11338 */ boolean isNullB1; /* 11339 */ UTF8String primitiveB1; /* 11340 */ { /* 11341 */ /* 11342 */ Object obj1 = ((Expression) references[1]).eval(null); /* 11343 */ UTF8String value1 = (UTF8String) obj1; /* 11344 */ isNullB1 = false; /* 11345 */ primitiveB1 = value1; /* 11346 */ } /* 11347 */ if (isNullA1 && isNullB1) { /* 11348 */ // Nothing /* 11349 */ } else if (isNullA1) { /* 11350 */ return -1; /* 11351 */ } else if (isNullB1) { /* 11352 */ return 1; /* 11353 */ } else { /* 11354 */ int comp = primitiveA1.compare(primitiveB1); /* 11355 */ if (comp != 0) { /* 11356 */ return comp; /* 11357 */ } /* 11358 */ } /* 1.... */ /* 1.... */ ... /* 1.... */ /* 12652 */ return 0; /* 12653 */ } /* 1.... */ /* 1.... */ ... /* 15387 */ /* 15388 */ public int compare(InternalRow a, InternalRow b) { /* 15389 */ /* 15390 */ int comp_0 = compare_0(a, b); /* 15391 */ if (comp_0 != 0) { /* 15392 */ return comp_0; /* 15393 */ } /* 15394 */ /* 15395 */ int comp_1 = compare_1(a, b); /* 15396 */ if (comp_1 != 0) { /* 15397 */ return comp_1; /* 15398 */ } /* 1.... */ /* 1.... */ ... /* 1.... */ /* 15450 */ return 0; /* 15451 */ } /* 15452 */ } ``` ## How was this patch tested? - a new added test case which - would fail prior to this patch - would pass with this patch - ordering correctness should already be covered by existing tests like those in `OrderingSuite` ## Acknowledgement A major part of this PR - the refactoring work of `splitExpression()` - has been done by ueshin. Author: Liwei Lin <[email protected]> Author: Takuya UESHIN <[email protected]> Author: Takuya Ueshin <[email protected]> Closes apache#15480 from lw-lin/spec-ordering-64k-.
1 parent d826d2f commit 06b0cee

File tree

3 files changed

+57
-7
lines changed

3 files changed

+57
-7
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -640,8 +640,24 @@ class CodegenContext {
640640
splitExpressions(expressions, "apply", ("InternalRow", row) :: Nil)
641641
}
642642

643-
private def splitExpressions(
644-
expressions: Seq[String], funcName: String, arguments: Seq[(String, String)]): String = {
643+
/**
644+
* Splits the generated code of expressions into multiple functions, because function has
645+
* 64kb code size limit in JVM
646+
*
647+
* @param expressions the codes to evaluate expressions.
648+
* @param funcName the split function name base.
649+
* @param arguments the list of (type, name) of the arguments of the split function.
650+
* @param returnType the return type of the split function.
651+
* @param makeSplitFunction makes split function body, e.g. add preparation or cleanup.
652+
* @param foldFunctions folds the split function calls.
653+
*/
654+
def splitExpressions(
655+
expressions: Seq[String],
656+
funcName: String,
657+
arguments: Seq[(String, String)],
658+
returnType: String = "void",
659+
makeSplitFunction: String => String = identity,
660+
foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = {
645661
val blocks = new ArrayBuffer[String]()
646662
val blockBuilder = new StringBuilder()
647663
for (code <- expressions) {
@@ -662,18 +678,19 @@ class CodegenContext {
662678
blocks.head
663679
} else {
664680
val func = freshName(funcName)
681+
val argString = arguments.map { case (t, name) => s"$t $name" }.mkString(", ")
665682
val functions = blocks.zipWithIndex.map { case (body, i) =>
666683
val name = s"${func}_$i"
667684
val code = s"""
668-
|private void $name(${arguments.map { case (t, name) => s"$t $name" }.mkString(", ")}) {
669-
| $body
685+
|private $returnType $name($argString) {
686+
| ${makeSplitFunction(body)}
670687
|}
671688
""".stripMargin
672689
addNewFunction(name, code)
673690
name
674691
}
675692

676-
functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")});").mkString("\n")
693+
foldFunctions(functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")})"))
677694
}
678695
}
679696

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,31 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
117117
}
118118
}
119119
"""
120-
}.mkString("\n")
121-
comparisons
120+
}
121+
122+
ctx.splitExpressions(
123+
expressions = comparisons,
124+
funcName = "compare",
125+
arguments = Seq(("InternalRow", "a"), ("InternalRow", "b")),
126+
returnType = "int",
127+
makeSplitFunction = { body =>
128+
s"""
129+
InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated.
130+
$body
131+
return 0;
132+
"""
133+
},
134+
foldFunctions = { funCalls =>
135+
funCalls.zipWithIndex.map { case (funCall, i) =>
136+
val comp = ctx.freshName("comp")
137+
s"""
138+
int $comp = $funCall;
139+
if ($comp != 0) {
140+
return $comp;
141+
}
142+
"""
143+
}.mkString
144+
})
122145
}
123146

124147
protected def create(ordering: Seq[SortOrder]): BaseOrdering = {

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,4 +127,14 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper {
127127
}
128128
}
129129
}
130+
131+
test("SPARK-16845: GeneratedClass$SpecificOrdering grows beyond 64 KB") {
132+
val sortOrder = Literal("abc").asc
133+
134+
// this is passing prior to SPARK-16845, and it should also be passing after SPARK-16845
135+
GenerateOrdering.generate(Array.fill(40)(sortOrder))
136+
137+
// verify that we can support up to 5000 ordering comparisons, which should be sufficient
138+
GenerateOrdering.generate(Array.fill(5000)(sortOrder))
139+
}
130140
}

0 commit comments

Comments
 (0)