Skip to content

Commit bda6ac2

Browse files
authored
Merge branch 'master' into plan-to-file
2 parents f7de26d + e017cb3 commit bda6ac2

File tree

3 files changed

+19
-2
lines changed

3 files changed

+19
-2
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
2626
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
2727
import org.apache.spark.sql.catalyst.trees.TreeNode
2828
import org.apache.spark.sql.catalyst.util.truncatedString
29+
import org.apache.spark.sql.internal.SQLConf
2930
import org.apache.spark.sql.types._
3031

3132
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -121,7 +122,8 @@ abstract class Expression extends TreeNode[Expression] {
121122

122123
private def reduceCodeSize(ctx: CodegenContext, eval: ExprCode): Unit = {
123124
// TODO: support whole stage codegen too
124-
if (eval.code.length > 1024 && ctx.INPUT_ROW != null && ctx.currentVars == null) {
125+
val splitThreshold = SQLConf.get.methodSplitThreshold
126+
if (eval.code.length > splitThreshold && ctx.INPUT_ROW != null && ctx.currentVars == null) {
125127
val setIsNull = if (!eval.isNull.isInstanceOf[LiteralValue]) {
126128
val globalIsNull = ctx.addMutableState(CodeGenerator.JAVA_BOOLEAN, "globalIsNull")
127129
val localIsNull = eval.isNull

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -910,12 +910,13 @@ class CodegenContext {
910910
val blocks = new ArrayBuffer[String]()
911911
val blockBuilder = new StringBuilder()
912912
var length = 0
913+
val splitThreshold = SQLConf.get.methodSplitThreshold
913914
for (code <- expressions) {
914915
// We can't know how many bytecode will be generated, so use the length of source code
915916
// as metric. A method should not go beyond 8K, otherwise it will not be JITted, should
916917
// also not be too small, or it will have many function calls (for wide table), see the
917918
// results in BenchmarkWideTable.
918-
if (length > 1024) {
919+
if (length > splitThreshold) {
919920
blocks += blockBuilder.toString()
920921
blockBuilder.clear()
921922
length = 0

sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -818,6 +818,18 @@ object SQLConf {
818818
.intConf
819819
.createWithDefault(65535)
820820

821+
val CODEGEN_METHOD_SPLIT_THRESHOLD = buildConf("spark.sql.codegen.methodSplitThreshold")
822+
.internal()
823+
.doc("The threshold of source-code splitting in the codegen. When the number of characters " +
824+
"in a single Java function (without comment) exceeds the threshold, the function will be " +
825+
"automatically split to multiple smaller ones. We cannot know how many bytecode will be " +
826+
"generated, so use the code length as metric. When running on HotSpot, a function's " +
827+
"bytecode should not go beyond 8KB, otherwise it will not be JITted; it also should not " +
828+
"be too small, otherwise there will be many function calls.")
829+
.intConf
830+
.checkValue(threshold => threshold > 0, "The threshold must be a positive integer.")
831+
.createWithDefault(1024)
832+
821833
val WHOLESTAGE_SPLIT_CONSUME_FUNC_BY_OPERATOR =
822834
buildConf("spark.sql.codegen.splitConsumeFuncByOperator")
823835
.internal()
@@ -1747,6 +1759,8 @@ class SQLConf extends Serializable with Logging {
17471759

17481760
def hugeMethodLimit: Int = getConf(WHOLESTAGE_HUGE_METHOD_LIMIT)
17491761

1762+
def methodSplitThreshold: Int = getConf(CODEGEN_METHOD_SPLIT_THRESHOLD)
1763+
17501764
def wholeStageSplitConsumeFuncByOperator: Boolean =
17511765
getConf(WHOLESTAGE_SPLIT_CONSUME_FUNC_BY_OPERATOR)
17521766

0 commit comments

Comments
 (0)