From 4ac090ae444f98d89d89931d251cad061247eea8 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Wed, 16 Aug 2017 20:35:05 +0100 Subject: [PATCH 1/9] initial commit --- .../sql/catalyst/expressions/codegen/CodeGenerator.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index f9c5ef8439085..2cb66599076a9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -772,16 +772,19 @@ class CodegenContext { foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = { val blocks = new ArrayBuffer[String]() val blockBuilder = new StringBuilder() + var length = 0 for (code <- expressions) { // We can't know how many bytecode will be generated, so use the length of source code // as metric. A method should not go beyond 8K, otherwise it will not be JITted, should // also not be too small, or it will have many function calls (for wide table), see the // results in BenchmarkWideTable. - if (blockBuilder.length > 1024) { + if (length > 1024) { blocks += blockBuilder.toString() blockBuilder.clear() + length = 0 } blockBuilder.append(code) + length += CodeFormatter.stripExtraNewLinesAndComments(code).length } blocks += blockBuilder.toString() From d96f8e52c596acb2cc56d3830386689ac3924a15 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Thu, 17 Aug 2017 08:25:25 +0100 Subject: [PATCH 2/9] make threshold configurable --- .../catalyst/expressions/codegen/CodeGenerator.scala | 3 ++- .../scala/org/apache/spark/sql/internal/SQLConf.scala | 11 +++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 2cb66599076a9..4758226b9dd24 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -772,13 +772,14 @@ class CodegenContext { foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = { val blocks = new ArrayBuffer[String]() val blockBuilder = new StringBuilder() + val maxCharacters = SQLConf.get.maxCharsPerFunction var length = 0 for (code <- expressions) { // We can't know how many bytecode will be generated, so use the length of source code // as metric. A method should not go beyond 8K, otherwise it will not be JITted, should // also not be too small, or it will have many function calls (for wide table), see the // results in BenchmarkWideTable. - if (length > 1024) { + if (length > maxCharacters) { blocks += blockBuilder.toString() blockBuilder.clear() length = 0 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 58323740b80cc..82a7c506076b3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -586,6 +586,15 @@ object SQLConf { .intConf .createWithDefault(CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT) + val CODEGEN_MAX_CHARS_PER_FUNCTION = buildConf("spark.sql.codegen.maxCharactersPerFunction") + .internal() + .doc("The maximum characters of a single Java function generated by codegen. " + + "When the generated function exceeds this threshold, the multiple statements, " + + "whose characters are less than the value, are splited into a function. " + + "The default value 1024 is the max length of byte code JIT supported.") + .intConf + .createWithDefault(1024) + val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes") .doc("The maximum number of bytes to pack into a single partition when reading files.") .longConf @@ -1061,6 +1070,8 @@ class SQLConf extends Serializable with Logging { def hugeMethodLimit: Int = getConf(WHOLESTAGE_HUGE_METHOD_LIMIT) + def maxCharsPerFunction: Int = getConf(CODEGEN_MAX_CHARS_PER_FUNCTION) + def tableRelationCacheSize: Int = getConf(StaticSQLConf.FILESOURCE_TABLE_RELATION_CACHE_SIZE) From 87578dbbd3a497a5962a9c04bd8843e977ccc0fb Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Thu, 24 Aug 2017 19:40:12 +0100 Subject: [PATCH 3/9] use lines per method as split threshold instead of chars per method --- .../expressions/codegen/CodeGenerator.scala | 13 +++++++------ .../org/apache/spark/sql/internal/SQLConf.scala | 12 ++++++------ .../execution/benchmark/BenchmarkWideTable.scala | 16 ++++++++++------ 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 4758226b9dd24..d400c929ad391 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -772,20 +772,21 @@ class CodegenContext { foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = { val blocks = new ArrayBuffer[String]() val blockBuilder = new StringBuilder() - val maxCharacters = SQLConf.get.maxCharsPerFunction - var length = 0 + val maxLines = SQLConf.get.maxCodegenLinesPerFunction + var line = 0 for (code <- expressions) { - // We can't know how many bytecode will be generated, so use the length of source code + // We can't know how many bytecode will be generated, so use the line of source code // as metric. A method should not go beyond 8K, otherwise it will not be JITted, should // also not be too small, or it will have many function calls (for wide table), see the // results in BenchmarkWideTable. - if (length > maxCharacters) { + if (line > maxLines) { blocks += blockBuilder.toString() blockBuilder.clear() - length = 0 + line = 0 } blockBuilder.append(code) - length += CodeFormatter.stripExtraNewLinesAndComments(code).length + val lineOfCode = CodeFormatter.stripExtraNewLinesAndComments(code).count(_ == '\n') + line += (if (lineOfCode == 0) 1 else lineOfCode) } blocks += blockBuilder.toString() diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 82a7c506076b3..fc988ad6dd8cd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -586,14 +586,14 @@ object SQLConf { .intConf .createWithDefault(CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT) - val CODEGEN_MAX_CHARS_PER_FUNCTION = buildConf("spark.sql.codegen.maxCharactersPerFunction") + val CODEGEN_MAX_LINES_PER_FUNCTION = buildConf("spark.sql.codegen.maxCodegenLinesPerFunction") .internal() - .doc("The maximum characters of a single Java function generated by codegen. " + + .doc("The maximum lines of a single Java function generated by codegen. " + "When the generated function exceeds this threshold, the multiple statements, " + - "whose characters are less than the value, are splited into a function. " + - "The default value 1024 is the max length of byte code JIT supported.") + "whose lines are less than the value, are splited into a function. " + + "The default value 100 is the max length of byte code JIT supported.") .intConf - .createWithDefault(1024) + .createWithDefault(100) val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes") .doc("The maximum number of bytes to pack into a single partition when reading files.") @@ -1070,7 +1070,7 @@ class SQLConf extends Serializable with Logging { def hugeMethodLimit: Int = getConf(WHOLESTAGE_HUGE_METHOD_LIMIT) - def maxCharsPerFunction: Int = getConf(CODEGEN_MAX_CHARS_PER_FUNCTION) + def maxCodegenLinesPerFunction: Int = getConf(CODEGEN_MAX_LINES_PER_FUNCTION) def tableRelationCacheSize: Int = getConf(StaticSQLConf.FILESOURCE_TABLE_RELATION_CACHE_SIZE) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala index 9dcaca0ca93ee..f77b9c2a487f9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala @@ -42,12 +42,16 @@ class BenchmarkWideTable extends BenchmarkBase { /** * Here are some numbers with different split threshold: * - * Split threshold methods Rate(M/s) Per Row(ns) - * 10 400 0.4 2279 - * 100 200 0.6 1554 - * 1k 37 0.9 1116 - * 8k 5 0.5 2025 - * 64k 1 0.0 21649 + * Split threshold Rate(M/s) Per Row(ns) + * 10 0.5 2131.3 + * 20 0.5 2073.7 + * 40 0.5 2085.2 + * 64 0.5 2012.2 + * 80 0.5 2112.2 + * 100 0.5 1984.0 + * 128 0.5 2097.9 + * 256 0.5 2038.9 + * 1024 0.5 2045.2 */ } } From 073e9e5bd4ec93f3b7f03fe025088d1d0b778111 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Fri, 25 Aug 2017 01:57:12 +0100 Subject: [PATCH 4/9] update benchmark results --- .../execution/benchmark/BenchmarkWideTable.scala | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala index f77b9c2a487f9..293516cc643ef 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala @@ -43,15 +43,11 @@ class BenchmarkWideTable extends BenchmarkBase { * Here are some numbers with different split threshold: * * Split threshold Rate(M/s) Per Row(ns) - * 10 0.5 2131.3 - * 20 0.5 2073.7 - * 40 0.5 2085.2 - * 64 0.5 2012.2 - * 80 0.5 2112.2 - * 100 0.5 1984.0 - * 128 0.5 2097.9 - * 256 0.5 2038.9 - * 1024 0.5 2045.2 + * 10 1.4 724.3 + * 80 1.5 682.6 + * 100 1.7 599.1 + * 128 1.5 678.8 + * 1024 0.7 1372.1 */ } } From 63377a61e156bf8a6480d7a5c5c13118e456baba Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Sat, 26 Aug 2017 17:42:08 +0100 Subject: [PATCH 5/9] make a new option effective at runtime --- .../catalyst/expressions/codegen/CodeGenerator.scala | 8 +++++++- .../scala/org/apache/spark/sql/internal/SQLConf.scala | 11 ----------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index d400c929ad391..1d6c7f242e9a2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -772,7 +772,13 @@ class CodegenContext { foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = { val blocks = new ArrayBuffer[String]() val blockBuilder = new StringBuilder() - val maxLines = SQLConf.get.maxCodegenLinesPerFunction + val defaultMaxLines = 100 + val maxLines = if (SparkEnv.get != null) { + SparkEnv.get.conf.getInt("spark.sql.codegen.expressions.maxCodegenLinesPerFunction", + defaultMaxLines) + } else { + defaultMaxLines + } var line = 0 for (code <- expressions) { // We can't know how many bytecode will be generated, so use the line of source code diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index fc988ad6dd8cd..58323740b80cc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -586,15 +586,6 @@ object SQLConf { .intConf .createWithDefault(CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT) - val CODEGEN_MAX_LINES_PER_FUNCTION = buildConf("spark.sql.codegen.maxCodegenLinesPerFunction") - .internal() - .doc("The maximum lines of a single Java function generated by codegen. " + - "When the generated function exceeds this threshold, the multiple statements, " + - "whose lines are less than the value, are splited into a function. " + - "The default value 100 is the max length of byte code JIT supported.") - .intConf - .createWithDefault(100) - val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes") .doc("The maximum number of bytes to pack into a single partition when reading files.") .longConf @@ -1070,8 +1061,6 @@ class SQLConf extends Serializable with Logging { def hugeMethodLimit: Int = getConf(WHOLESTAGE_HUGE_METHOD_LIMIT) - def maxCodegenLinesPerFunction: Int = getConf(CODEGEN_MAX_LINES_PER_FUNCTION) - def tableRelationCacheSize: Int = getConf(StaticSQLConf.FILESOURCE_TABLE_RELATION_CACHE_SIZE) From a489938b3f128558df31c97a32e196620c9fd475 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Wed, 4 Oct 2017 19:31:52 +0100 Subject: [PATCH 6/9] rebase with master --- .../sql/catalyst/expressions/codegen/CodeFormatter.scala | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala index 60e600d8dbd8f..7b398f424cead 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala @@ -89,6 +89,14 @@ object CodeFormatter { } new CodeAndComment(code.result().trim(), map) } + + def stripExtraNewLinesAndComments(input: String): String = { + val commentReg = + ("""([ |\t]*?\/\*[\s|\S]*?\*\/[ |\t]*?)|""" + // strip /*comment*/ + """([ |\t]*?\/\/[\s\S]*?\n)""").r // strip //comment + val codeWithoutComment = commentReg.replaceAllIn(input, "") + codeWithoutComment.replaceAll("""\n\s*\n""", "\n") // strip ExtraNewLines + } } private class CodeFormatter { From b04c09c3683f104909713344c90e46b4129f5401 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Fri, 6 Oct 2017 17:25:44 +0100 Subject: [PATCH 7/9] avoid to use SparkEnv.get --- .../catalyst/expressions/codegen/CodeGenerator.scala | 8 +------- .../scala/org/apache/spark/sql/internal/SQLConf.scala | 11 +++++++++++ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 1d6c7f242e9a2..d400c929ad391 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -772,13 +772,7 @@ class CodegenContext { foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = { val blocks = new ArrayBuffer[String]() val blockBuilder = new StringBuilder() - val defaultMaxLines = 100 - val maxLines = if (SparkEnv.get != null) { - SparkEnv.get.conf.getInt("spark.sql.codegen.expressions.maxCodegenLinesPerFunction", - defaultMaxLines) - } else { - defaultMaxLines - } + val maxLines = SQLConf.get.maxCodegenLinesPerFunction var line = 0 for (code <- expressions) { // We can't know how many bytecode will be generated, so use the line of source code diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 58323740b80cc..ee94cdd0971e3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -586,6 +586,15 @@ object SQLConf { .intConf .createWithDefault(CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT) + val CODEGEN_MAX_LINES_PER_FUNC = + buildConf("spark.sql.codegen.expressions.maxCodegenLinesPerFunction") + .internal() + .doc("The maximum line number of a single Java function splited by " + + "CodeGenerator.splitExpression() method. This threshold is defined to reduce " + + "possibility of exceeding 64KB of Java bytecode size of the splitted method.") + .intConf + .createWithDefault(100) + val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes") .doc("The maximum number of bytes to pack into a single partition when reading files.") .longConf @@ -1061,6 +1070,8 @@ class SQLConf extends Serializable with Logging { def hugeMethodLimit: Int = getConf(WHOLESTAGE_HUGE_METHOD_LIMIT) + def maxCodegenLinesPerFunction: Int = getConf(CODEGEN_MAX_LINES_PER_FUNC) + def tableRelationCacheSize: Int = getConf(StaticSQLConf.FILESOURCE_TABLE_RELATION_CACHE_SIZE) From 4c4780207afcc2d55d19bf8d3e9fc29812f07ae8 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Tue, 10 Oct 2017 18:58:24 +0100 Subject: [PATCH 8/9] use the original threshold against Java code excluding comments --- .../catalyst/expressions/codegen/CodeGenerator.scala | 12 +++++------- .../org/apache/spark/sql/internal/SQLConf.scala | 11 ----------- .../sql/execution/benchmark/BenchmarkWideTable.scala | 12 ++++++------ 3 files changed, 11 insertions(+), 24 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index d400c929ad391..2cb66599076a9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -772,21 +772,19 @@ class CodegenContext { foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = { val blocks = new ArrayBuffer[String]() val blockBuilder = new StringBuilder() - val maxLines = SQLConf.get.maxCodegenLinesPerFunction - var line = 0 + var length = 0 for (code <- expressions) { - // We can't know how many bytecode will be generated, so use the line of source code + // We can't know how many bytecode will be generated, so use the length of source code // as metric. A method should not go beyond 8K, otherwise it will not be JITted, should // also not be too small, or it will have many function calls (for wide table), see the // results in BenchmarkWideTable. - if (line > maxLines) { + if (length > 1024) { blocks += blockBuilder.toString() blockBuilder.clear() - line = 0 + length = 0 } blockBuilder.append(code) - val lineOfCode = CodeFormatter.stripExtraNewLinesAndComments(code).count(_ == '\n') - line += (if (lineOfCode == 0) 1 else lineOfCode) + length += CodeFormatter.stripExtraNewLinesAndComments(code).length } blocks += blockBuilder.toString() diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index ee94cdd0971e3..58323740b80cc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -586,15 +586,6 @@ object SQLConf { .intConf .createWithDefault(CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT) - val CODEGEN_MAX_LINES_PER_FUNC = - buildConf("spark.sql.codegen.expressions.maxCodegenLinesPerFunction") - .internal() - .doc("The maximum line number of a single Java function splited by " + - "CodeGenerator.splitExpression() method. This threshold is defined to reduce " + - "possibility of exceeding 64KB of Java bytecode size of the splitted method.") - .intConf - .createWithDefault(100) - val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes") .doc("The maximum number of bytes to pack into a single partition when reading files.") .longConf @@ -1070,8 +1061,6 @@ class SQLConf extends Serializable with Logging { def hugeMethodLimit: Int = getConf(WHOLESTAGE_HUGE_METHOD_LIMIT) - def maxCodegenLinesPerFunction: Int = getConf(CODEGEN_MAX_LINES_PER_FUNC) - def tableRelationCacheSize: Int = getConf(StaticSQLConf.FILESOURCE_TABLE_RELATION_CACHE_SIZE) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala index 293516cc643ef..9dcaca0ca93ee 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala @@ -42,12 +42,12 @@ class BenchmarkWideTable extends BenchmarkBase { /** * Here are some numbers with different split threshold: * - * Split threshold Rate(M/s) Per Row(ns) - * 10 1.4 724.3 - * 80 1.5 682.6 - * 100 1.7 599.1 - * 128 1.5 678.8 - * 1024 0.7 1372.1 + * Split threshold methods Rate(M/s) Per Row(ns) + * 10 400 0.4 2279 + * 100 200 0.6 1554 + * 1k 37 0.9 1116 + * 8k 5 0.5 2025 + * 64k 1 0.0 21649 */ } } From 516a72a62cb579f2952c4b776afec0dc1826e590 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Tue, 10 Oct 2017 19:37:27 +0100 Subject: [PATCH 9/9] revert test case --- .../codegen/CodeFormatterSuite.scala | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala index 9d0a41661beaa..a0f1a64b0ab08 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala @@ -53,6 +53,38 @@ class CodeFormatterSuite extends SparkFunSuite { assert(reducedCode.body === "/*project_c4*/") } + test("removing extra new lines and comments") { + val code = + """ + |/* + | * multi + | * line + | * comments + | */ + | + |public function() { + |/*comment*/ + | /*comment_with_space*/ + |code_body + |//comment + |code_body + | //comment_with_space + | + |code_body + |} + """.stripMargin + + val reducedCode = CodeFormatter.stripExtraNewLinesAndComments(code) + assert(reducedCode === + """ + |public function() { + |code_body + |code_body + |code_body + |} + """.stripMargin) + } + testCase("basic example") { """ |class A {