From c69be31314d9aa96c3920073beaf7cca46d507fa Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Fri, 6 Oct 2017 14:58:36 +0200 Subject: [PATCH 1/3] [SPARK-22215][SQL] Add configuration to set the threshold for generated class --- .../sql/catalyst/expressions/codegen/CodeGenerator.scala | 8 +++++--- .../scala/org/apache/spark/sql/internal/SQLConf.scala | 9 +++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index f9c5ef8439085..7f91169d84d02 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -279,11 +279,13 @@ class CodegenContext { inlineToOuterClass: Boolean = false): String = { // The number of named constants that can exist in the class is limited by the Constant Pool // limit, 65,536. We cannot know how many constants will be inserted for a class, so we use a - // threshold of 1600k bytes to determine when a function should be inlined to a private, nested - // sub-class. + // threshold to determine when a function should be inlined to a private, nested sub-class + val generatedClassLengthThreshold = SparkEnv.get.conf.getInt( + SQLConf.GENERATED_CLASS_LENGTH_THRESHOLD.key, + SQLConf.GENERATED_CLASS_LENGTH_THRESHOLD.defaultValue.get) val (className, classInstance) = if (inlineToOuterClass) { outerClassName -> "" - } else if (currClassSize > 1600000) { + } else if (currClassSize > generatedClassLengthThreshold) { val className = freshName("NestedClass") val classInstance = freshName("nestedClassInstance") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 58323740b80cc..7fe80f95c6f4a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -934,6 +934,15 @@ object SQLConf { .intConf .createWithDefault(10000) + val GENERATED_CLASS_LENGTH_THRESHOLD = + buildConf("spark.sql.codegen.generatedClass.size.threshold") + .doc("Threshold in bytes for the size of a generated class. If the generated class " + + "size is higher of this value, a private nested class is created and used." + + "This is useful to limit the number of named constants in the class " + + "and therefore its Constant Pool. The default is 1600k.") + .intConf + .createWithDefault(1600000) + object Deprecated { val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks" } From ed8aaead6cec0288f6019d4bf10f0798172abc35 Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Fri, 6 Oct 2017 17:24:09 +0200 Subject: [PATCH 2/3] use SQLConf instead of SparkEnv --- .../sql/catalyst/expressions/codegen/CodeGenerator.scala | 4 +--- .../main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 ++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 7f91169d84d02..f483db09d69f4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -280,9 +280,7 @@ class CodegenContext { // The number of named constants that can exist in the class is limited by the Constant Pool // limit, 65,536. We cannot know how many constants will be inserted for a class, so we use a // threshold to determine when a function should be inlined to a private, nested sub-class - val generatedClassLengthThreshold = SparkEnv.get.conf.getInt( - SQLConf.GENERATED_CLASS_LENGTH_THRESHOLD.key, - SQLConf.GENERATED_CLASS_LENGTH_THRESHOLD.defaultValue.get) + val generatedClassLengthThreshold = SQLConf.get.generatedClassLengthThreshold val (className, classInstance) = if (inlineToOuterClass) { outerClassName -> "" } else if (currClassSize > generatedClassLengthThreshold) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 7fe80f95c6f4a..2ee559c6dac27 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1223,6 +1223,8 @@ class SQLConf extends Serializable with Logging { def arrowMaxRecordsPerBatch: Int = getConf(ARROW_EXECUTION_MAX_RECORDS_PER_BATCH) + def generatedClassLengthThreshold: Int = getConf(GENERATED_CLASS_LENGTH_THRESHOLD) + /** ********************** SQLConf functionality methods ************ */ /** Set Spark SQL configuration properties. */ From 4c68de68fc9d5ed947f4d3db29fdb2fe0bb246ac Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Fri, 6 Oct 2017 18:08:24 +0200 Subject: [PATCH 3/3] add internal and check --- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 2ee559c6dac27..b3851fc78dc11 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -936,11 +936,14 @@ object SQLConf { val GENERATED_CLASS_LENGTH_THRESHOLD = buildConf("spark.sql.codegen.generatedClass.size.threshold") + .internal() .doc("Threshold in bytes for the size of a generated class. If the generated class " + "size is higher of this value, a private nested class is created and used." + "This is useful to limit the number of named constants in the class " + "and therefore its Constant Pool. The default is 1600k.") .intConf + .checkValue(bytes => bytes > 0, "The maximum size of a generated class " + + "in bytes must be a positive number.") .createWithDefault(1600000) object Deprecated {