From d72c9d052014031a8bd7672bf95c07f01fd44bc6 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Thu, 13 May 2021 14:04:08 +0800 Subject: [PATCH 1/5] Simplify the way to get classes from ClassBodyEvaluator --- .../catalyst/expressions/codegen/CodeGenerator.scala | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index f1fc718432c5..10d5d968462c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -1434,15 +1434,7 @@ object CodeGenerator extends Logging { */ private def updateAndGetCompilationStats(evaluator: ClassBodyEvaluator): ByteCodeStats = { // First retrieve the generated classes. - val classes = { - val scField = classOf[ClassBodyEvaluator].getDeclaredField("sc") - scField.setAccessible(true) - val compiler = scField.get(evaluator).asInstanceOf[SimpleCompiler] - val loader = compiler.getClassLoader.asInstanceOf[ByteArrayClassLoader] - val classesField = loader.getClass.getDeclaredField("classes") - classesField.setAccessible(true) - classesField.get(loader).asInstanceOf[JavaMap[String, Array[Byte]]].asScala - } + val classes = evaluator.getBytecodes.asScala // Then walk the classes to get at the method bytecode. val codeAttr = Utils.classForName("org.codehaus.janino.util.ClassFile$CodeAttribute") From 738bdc6d3459c30399ac4efec6e4bef799611ddd Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Thu, 13 May 2021 14:14:23 +0800 Subject: [PATCH 2/5] remove unused imports --- .../sql/catalyst/expressions/codegen/CodeGenerator.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 10d5d968462c..f8c63960dd6d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -18,7 +18,6 @@ package org.apache.spark.sql.catalyst.expressions.codegen import java.io.ByteArrayInputStream -import java.util.{Map => JavaMap} import scala.collection.JavaConverters._ import scala.collection.mutable @@ -28,8 +27,7 @@ import scala.util.control.NonFatal import com.google.common.cache.{CacheBuilder, CacheLoader} import com.google.common.util.concurrent.{ExecutionError, UncheckedExecutionException} import org.codehaus.commons.compiler.{CompileException, InternalCompilerException} -import org.codehaus.commons.compiler.util.reflect.ByteArrayClassLoader -import org.codehaus.janino.{ClassBodyEvaluator, SimpleCompiler} +import org.codehaus.janino.ClassBodyEvaluator import org.codehaus.janino.util.ClassFile import org.apache.spark.{TaskContext, TaskKilledException} From 5ddb354f2f7b90430539dcca0adaaee7083d47af Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Mon, 17 May 2021 13:20:28 +0800 Subject: [PATCH 3/5] add a new test --- .../expressions/CodeGenerationSuite.scala | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala index 026d9676f4fb..c0760c91dae2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala @@ -68,6 +68,24 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper { assert(CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.getCount() > startCount4) } + test("SPARK-35398: verify the metrics state doesn't change between " + + "using reflection api and using ClassBodyEvaluator.getBytecodes api in " + + "updateAndGetCompilationStats method.") { + val metricGeneratedClassByteCodeSizeSnapshot = + CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.getSnapshot.getValues + val metricGeneratedMethodByteCodeSizeSnapshot = + CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.getSnapshot.getValues + GenerateOrdering.generate(Add(Literal(123), Literal(1)).asc :: Nil) + // The state of metrics may change when using different versions of janino or + // the operator implementation changes. + assert(CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.getSnapshot.getValues + .diff(metricGeneratedClassByteCodeSizeSnapshot) + .sameElements(Array(740, 1293))) + assert(CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.getSnapshot.getValues + .diff(metricGeneratedMethodByteCodeSizeSnapshot) + .sameElements(Array(5, 5, 6, 7, 10, 15, 46))) + } + test("SPARK-8443: split wide projections into blocks due to JVM code size limit") { val length = 5000 val expressions = List.fill(length)(EqualTo(Literal(1), Literal(1))) From c1deabe18e6cb34199fb9fb0bf171020c61168f0 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Mon, 17 May 2021 14:09:04 +0800 Subject: [PATCH 4/5] fix test --- .../spark/sql/catalyst/expressions/CodeGenerationSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala index c0760c91dae2..1d5c98b6a218 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala @@ -75,7 +75,7 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper { CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.getSnapshot.getValues val metricGeneratedMethodByteCodeSizeSnapshot = CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.getSnapshot.getValues - GenerateOrdering.generate(Add(Literal(123), Literal(1)).asc :: Nil) + GenerateOrdering.generate(Subtract(Literal(123), Literal(1)).asc :: Nil) // The state of metrics may change when using different versions of janino or // the operator implementation changes. assert(CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.getSnapshot.getValues From 527e10265d1f095bff11d539d752a6bf0fc33e7c Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Tue, 18 May 2021 19:34:08 +0800 Subject: [PATCH 5/5] revert test case --- .../expressions/CodeGenerationSuite.scala | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala index 1d5c98b6a218..026d9676f4fb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala @@ -68,24 +68,6 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper { assert(CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.getCount() > startCount4) } - test("SPARK-35398: verify the metrics state doesn't change between " + - "using reflection api and using ClassBodyEvaluator.getBytecodes api in " + - "updateAndGetCompilationStats method.") { - val metricGeneratedClassByteCodeSizeSnapshot = - CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.getSnapshot.getValues - val metricGeneratedMethodByteCodeSizeSnapshot = - CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.getSnapshot.getValues - GenerateOrdering.generate(Subtract(Literal(123), Literal(1)).asc :: Nil) - // The state of metrics may change when using different versions of janino or - // the operator implementation changes. - assert(CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.getSnapshot.getValues - .diff(metricGeneratedClassByteCodeSizeSnapshot) - .sameElements(Array(740, 1293))) - assert(CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.getSnapshot.getValues - .diff(metricGeneratedMethodByteCodeSizeSnapshot) - .sameElements(Array(5, 5, 6, 7, 10, 15, 46))) - } - test("SPARK-8443: split wide projections into blocks due to JVM code size limit") { val length = 5000 val expressions = List.fill(length)(EqualTo(Literal(1), Literal(1)))