diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index f9c5ef8439085..f483db09d69f4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -279,11 +279,11 @@ class CodegenContext { inlineToOuterClass: Boolean = false): String = { // The number of named constants that can exist in the class is limited by the Constant Pool // limit, 65,536. We cannot know how many constants will be inserted for a class, so we use a - // threshold of 1600k bytes to determine when a function should be inlined to a private, nested - // sub-class. + // threshold to determine when a function should be inlined to a private, nested sub-class + val generatedClassLengthThreshold = SQLConf.get.generatedClassLengthThreshold val (className, classInstance) = if (inlineToOuterClass) { outerClassName -> "" - } else if (currClassSize > 1600000) { + } else if (currClassSize > generatedClassLengthThreshold) { val className = freshName("NestedClass") val classInstance = freshName("nestedClassInstance") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 58323740b80cc..b3851fc78dc11 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -934,6 +934,18 @@ object SQLConf { .intConf .createWithDefault(10000) + val GENERATED_CLASS_LENGTH_THRESHOLD = + buildConf("spark.sql.codegen.generatedClass.size.threshold") + .internal() + .doc("Threshold in bytes for the size of a generated class. If the generated class " + + "size is higher of this value, a private nested class is created and used." + + "This is useful to limit the number of named constants in the class " + + "and therefore its Constant Pool. The default is 1600k.") + .intConf + .checkValue(bytes => bytes > 0, "The maximum size of a generated class " + + "in bytes must be a positive number.") + .createWithDefault(1600000) + object Deprecated { val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks" } @@ -1214,6 +1226,8 @@ class SQLConf extends Serializable with Logging { def arrowMaxRecordsPerBatch: Int = getConf(ARROW_EXECUTION_MAX_RECORDS_PER_BATCH) + def generatedClassLengthThreshold: Int = getConf(GENERATED_CLASS_LENGTH_THRESHOLD) + /** ********************** SQLConf functionality methods ************ */ /** Set Spark SQL configuration properties. */