From 4ac090ae444f98d89d89931d251cad061247eea8 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Wed, 16 Aug 2017 20:35:05 +0100
Subject: [PATCH 1/9] initial commit

---
 .../sql/catalyst/expressions/codegen/CodeGenerator.scala     | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index f9c5ef8439085..2cb66599076a9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -772,16 +772,19 @@ class CodegenContext {
       foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = {
     val blocks = new ArrayBuffer[String]()
     val blockBuilder = new StringBuilder()
+    var length = 0
     for (code <- expressions) {
       // We can't know how many bytecode will be generated, so use the length of source code
       // as metric. A method should not go beyond 8K, otherwise it will not be JITted, should
       // also not be too small, or it will have many function calls (for wide table), see the
       // results in BenchmarkWideTable.
-      if (blockBuilder.length > 1024) {
+      if (length > 1024) {
         blocks += blockBuilder.toString()
         blockBuilder.clear()
+        length = 0
       }
       blockBuilder.append(code)
+      length += CodeFormatter.stripExtraNewLinesAndComments(code).length
     }
     blocks += blockBuilder.toString()
 

From d96f8e52c596acb2cc56d3830386689ac3924a15 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Thu, 17 Aug 2017 08:25:25 +0100
Subject: [PATCH 2/9] make threshold configurable

---
 .../catalyst/expressions/codegen/CodeGenerator.scala  |  3 ++-
 .../scala/org/apache/spark/sql/internal/SQLConf.scala | 11 +++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 2cb66599076a9..4758226b9dd24 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -772,13 +772,14 @@ class CodegenContext {
       foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = {
     val blocks = new ArrayBuffer[String]()
     val blockBuilder = new StringBuilder()
+    val maxCharacters = SQLConf.get.maxCharsPerFunction
     var length = 0
     for (code <- expressions) {
       // We can't know how many bytecode will be generated, so use the length of source code
       // as metric. A method should not go beyond 8K, otherwise it will not be JITted, should
       // also not be too small, or it will have many function calls (for wide table), see the
       // results in BenchmarkWideTable.
-      if (length > 1024) {
+      if (length > maxCharacters) {
         blocks += blockBuilder.toString()
         blockBuilder.clear()
         length = 0
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 58323740b80cc..82a7c506076b3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -586,6 +586,15 @@ object SQLConf {
     .intConf
     .createWithDefault(CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT)
 
+  val CODEGEN_MAX_CHARS_PER_FUNCTION = buildConf("spark.sql.codegen.maxCharactersPerFunction")
+    .internal()
+    .doc("The maximum characters of a single Java function generated by codegen. " +
+      "When the generated function exceeds this threshold, the multiple statements, " +
+      "whose characters are less than the value, are splited into a function. " +
+      "The default value 1024 is the max length of byte code JIT supported.")
+    .intConf
+    .createWithDefault(1024)
+
   val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes")
     .doc("The maximum number of bytes to pack into a single partition when reading files.")
     .longConf
@@ -1061,6 +1070,8 @@ class SQLConf extends Serializable with Logging {
 
   def hugeMethodLimit: Int = getConf(WHOLESTAGE_HUGE_METHOD_LIMIT)
 
+  def maxCharsPerFunction: Int = getConf(CODEGEN_MAX_CHARS_PER_FUNCTION)
+
   def tableRelationCacheSize: Int =
     getConf(StaticSQLConf.FILESOURCE_TABLE_RELATION_CACHE_SIZE)
 

From 87578dbbd3a497a5962a9c04bd8843e977ccc0fb Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Thu, 24 Aug 2017 19:40:12 +0100
Subject: [PATCH 3/9] use lines per method as split threshold instead of chars
 per method

---
 .../expressions/codegen/CodeGenerator.scala      | 13 +++++++------
 .../org/apache/spark/sql/internal/SQLConf.scala  | 12 ++++++------
 .../execution/benchmark/BenchmarkWideTable.scala | 16 ++++++++++------
 3 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 4758226b9dd24..d400c929ad391 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -772,20 +772,21 @@ class CodegenContext {
       foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = {
     val blocks = new ArrayBuffer[String]()
     val blockBuilder = new StringBuilder()
-    val maxCharacters = SQLConf.get.maxCharsPerFunction
-    var length = 0
+    val maxLines = SQLConf.get.maxCodegenLinesPerFunction
+    var line = 0
     for (code <- expressions) {
-      // We can't know how many bytecode will be generated, so use the length of source code
+      // We can't know how many bytecode will be generated, so use the line of source code
       // as metric. A method should not go beyond 8K, otherwise it will not be JITted, should
       // also not be too small, or it will have many function calls (for wide table), see the
       // results in BenchmarkWideTable.
-      if (length > maxCharacters) {
+      if (line > maxLines) {
         blocks += blockBuilder.toString()
         blockBuilder.clear()
-        length = 0
+        line = 0
       }
       blockBuilder.append(code)
-      length += CodeFormatter.stripExtraNewLinesAndComments(code).length
+      val lineOfCode = CodeFormatter.stripExtraNewLinesAndComments(code).count(_ == '\n')
+      line += (if (lineOfCode == 0) 1 else lineOfCode)
     }
     blocks += blockBuilder.toString()
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 82a7c506076b3..fc988ad6dd8cd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -586,14 +586,14 @@ object SQLConf {
     .intConf
     .createWithDefault(CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT)
 
-  val CODEGEN_MAX_CHARS_PER_FUNCTION = buildConf("spark.sql.codegen.maxCharactersPerFunction")
+  val CODEGEN_MAX_LINES_PER_FUNCTION = buildConf("spark.sql.codegen.maxCodegenLinesPerFunction")
     .internal()
-    .doc("The maximum characters of a single Java function generated by codegen. " +
+    .doc("The maximum lines of a single Java function generated by codegen. " +
       "When the generated function exceeds this threshold, the multiple statements, " +
-      "whose characters are less than the value, are splited into a function. " +
-      "The default value 1024 is the max length of byte code JIT supported.")
+      "whose lines are less than the value, are splited into a function. " +
+      "The default value 100 is the max length of byte code JIT supported.")
     .intConf
-    .createWithDefault(1024)
+    .createWithDefault(100)
 
   val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes")
     .doc("The maximum number of bytes to pack into a single partition when reading files.")
@@ -1070,7 +1070,7 @@ class SQLConf extends Serializable with Logging {
 
   def hugeMethodLimit: Int = getConf(WHOLESTAGE_HUGE_METHOD_LIMIT)
 
-  def maxCharsPerFunction: Int = getConf(CODEGEN_MAX_CHARS_PER_FUNCTION)
+  def maxCodegenLinesPerFunction: Int = getConf(CODEGEN_MAX_LINES_PER_FUNCTION)
 
   def tableRelationCacheSize: Int =
     getConf(StaticSQLConf.FILESOURCE_TABLE_RELATION_CACHE_SIZE)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala
index 9dcaca0ca93ee..f77b9c2a487f9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala
@@ -42,12 +42,16 @@ class BenchmarkWideTable extends BenchmarkBase {
     /**
      * Here are some numbers with different split threshold:
      *
-     *  Split threshold      methods       Rate(M/s)   Per Row(ns)
-     *  10                   400           0.4         2279
-     *  100                  200           0.6         1554
-     *  1k                   37            0.9         1116
-     *  8k                   5             0.5         2025
-     *  64k                  1             0.0        21649
+     *  Split threshold      Rate(M/s)   Per Row(ns)
+     *  10                   0.5         2131.3
+     *  20                   0.5         2073.7
+     *  40                   0.5         2085.2
+     *  64                   0.5         2012.2
+     *  80                   0.5         2112.2
+     *  100                  0.5         1984.0
+     *  128                  0.5         2097.9
+     *  256                  0.5         2038.9
+     *  1024                 0.5         2045.2
      */
   }
 }

From 073e9e5bd4ec93f3b7f03fe025088d1d0b778111 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Fri, 25 Aug 2017 01:57:12 +0100
Subject: [PATCH 4/9] update benchmark results

---
 .../execution/benchmark/BenchmarkWideTable.scala   | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala
index f77b9c2a487f9..293516cc643ef 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala
@@ -43,15 +43,11 @@ class BenchmarkWideTable extends BenchmarkBase {
      * Here are some numbers with different split threshold:
      *
      *  Split threshold      Rate(M/s)   Per Row(ns)
-     *  10                   0.5         2131.3
-     *  20                   0.5         2073.7
-     *  40                   0.5         2085.2
-     *  64                   0.5         2012.2
-     *  80                   0.5         2112.2
-     *  100                  0.5         1984.0
-     *  128                  0.5         2097.9
-     *  256                  0.5         2038.9
-     *  1024                 0.5         2045.2
+     *  10                   1.4         724.3
+     *  80                   1.5         682.6
+     *  100                  1.7         599.1
+     *  128                  1.5         678.8
+     *  1024                 0.7         1372.1
      */
   }
 }

From 63377a61e156bf8a6480d7a5c5c13118e456baba Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Sat, 26 Aug 2017 17:42:08 +0100
Subject: [PATCH 5/9] make a new option effective at runtime

---
 .../catalyst/expressions/codegen/CodeGenerator.scala  |  8 +++++++-
 .../scala/org/apache/spark/sql/internal/SQLConf.scala | 11 -----------
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index d400c929ad391..1d6c7f242e9a2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -772,7 +772,13 @@ class CodegenContext {
       foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = {
     val blocks = new ArrayBuffer[String]()
     val blockBuilder = new StringBuilder()
-    val maxLines = SQLConf.get.maxCodegenLinesPerFunction
+    val defaultMaxLines = 100
+    val maxLines = if (SparkEnv.get != null) {
+      SparkEnv.get.conf.getInt("spark.sql.codegen.expressions.maxCodegenLinesPerFunction",
+        defaultMaxLines)
+    } else {
+      defaultMaxLines
+    }
     var line = 0
     for (code <- expressions) {
       // We can't know how many bytecode will be generated, so use the line of source code
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index fc988ad6dd8cd..58323740b80cc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -586,15 +586,6 @@ object SQLConf {
     .intConf
     .createWithDefault(CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT)
 
-  val CODEGEN_MAX_LINES_PER_FUNCTION = buildConf("spark.sql.codegen.maxCodegenLinesPerFunction")
-    .internal()
-    .doc("The maximum lines of a single Java function generated by codegen. " +
-      "When the generated function exceeds this threshold, the multiple statements, " +
-      "whose lines are less than the value, are splited into a function. " +
-      "The default value 100 is the max length of byte code JIT supported.")
-    .intConf
-    .createWithDefault(100)
-
   val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes")
     .doc("The maximum number of bytes to pack into a single partition when reading files.")
     .longConf
@@ -1070,8 +1061,6 @@ class SQLConf extends Serializable with Logging {
 
   def hugeMethodLimit: Int = getConf(WHOLESTAGE_HUGE_METHOD_LIMIT)
 
-  def maxCodegenLinesPerFunction: Int = getConf(CODEGEN_MAX_LINES_PER_FUNCTION)
-
   def tableRelationCacheSize: Int =
     getConf(StaticSQLConf.FILESOURCE_TABLE_RELATION_CACHE_SIZE)
 

From a489938b3f128558df31c97a32e196620c9fd475 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Wed, 4 Oct 2017 19:31:52 +0100
Subject: [PATCH 6/9] rebase with master

---
 .../sql/catalyst/expressions/codegen/CodeFormatter.scala  | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
index 60e600d8dbd8f..7b398f424cead 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
@@ -89,6 +89,14 @@ object CodeFormatter {
     }
     new CodeAndComment(code.result().trim(), map)
   }
+
+  def stripExtraNewLinesAndComments(input: String): String = {
+    val commentReg =
+      ("""([ |\t]*?\/\*[\s|\S]*?\*\/[ |\t]*?)|""" +    // strip /*comment*/
+       """([ |\t]*?\/\/[\s\S]*?\n)""").r               // strip //comment
+    val codeWithoutComment = commentReg.replaceAllIn(input, "")
+    codeWithoutComment.replaceAll("""\n\s*\n""", "\n") // strip ExtraNewLines
+  }
 }
 
 private class CodeFormatter {

From b04c09c3683f104909713344c90e46b4129f5401 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Fri, 6 Oct 2017 17:25:44 +0100
Subject: [PATCH 7/9] avoid to use SparkEnv.get

---
 .../catalyst/expressions/codegen/CodeGenerator.scala  |  8 +-------
 .../scala/org/apache/spark/sql/internal/SQLConf.scala | 11 +++++++++++
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 1d6c7f242e9a2..d400c929ad391 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -772,13 +772,7 @@ class CodegenContext {
       foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = {
     val blocks = new ArrayBuffer[String]()
     val blockBuilder = new StringBuilder()
-    val defaultMaxLines = 100
-    val maxLines = if (SparkEnv.get != null) {
-      SparkEnv.get.conf.getInt("spark.sql.codegen.expressions.maxCodegenLinesPerFunction",
-        defaultMaxLines)
-    } else {
-      defaultMaxLines
-    }
+    val maxLines = SQLConf.get.maxCodegenLinesPerFunction
     var line = 0
     for (code <- expressions) {
       // We can't know how many bytecode will be generated, so use the line of source code
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 58323740b80cc..ee94cdd0971e3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -586,6 +586,15 @@ object SQLConf {
     .intConf
     .createWithDefault(CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT)
 
+  val CODEGEN_MAX_LINES_PER_FUNC =
+    buildConf("spark.sql.codegen.expressions.maxCodegenLinesPerFunction")
+      .internal()
+      .doc("The maximum line number of a single Java function splited by " +
+        "CodeGenerator.splitExpression() method. This threshold is defined to reduce " +
+        "possibility of exceeding 64KB of Java bytecode size of the splitted method.")
+      .intConf
+      .createWithDefault(100)
+
   val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes")
     .doc("The maximum number of bytes to pack into a single partition when reading files.")
     .longConf
@@ -1061,6 +1070,8 @@ class SQLConf extends Serializable with Logging {
 
   def hugeMethodLimit: Int = getConf(WHOLESTAGE_HUGE_METHOD_LIMIT)
 
+  def maxCodegenLinesPerFunction: Int = getConf(CODEGEN_MAX_LINES_PER_FUNC)
+
   def tableRelationCacheSize: Int =
     getConf(StaticSQLConf.FILESOURCE_TABLE_RELATION_CACHE_SIZE)
 

From 4c4780207afcc2d55d19bf8d3e9fc29812f07ae8 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Tue, 10 Oct 2017 18:58:24 +0100
Subject: [PATCH 8/9] use the original threshold against Java code excluding
 comments

---
 .../catalyst/expressions/codegen/CodeGenerator.scala | 12 +++++-------
 .../org/apache/spark/sql/internal/SQLConf.scala      | 11 -----------
 .../sql/execution/benchmark/BenchmarkWideTable.scala | 12 ++++++------
 3 files changed, 11 insertions(+), 24 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index d400c929ad391..2cb66599076a9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -772,21 +772,19 @@ class CodegenContext {
       foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = {
     val blocks = new ArrayBuffer[String]()
     val blockBuilder = new StringBuilder()
-    val maxLines = SQLConf.get.maxCodegenLinesPerFunction
-    var line = 0
+    var length = 0
     for (code <- expressions) {
-      // We can't know how many bytecode will be generated, so use the line of source code
+      // We can't know how many bytecode will be generated, so use the length of source code
       // as metric. A method should not go beyond 8K, otherwise it will not be JITted, should
       // also not be too small, or it will have many function calls (for wide table), see the
       // results in BenchmarkWideTable.
-      if (line > maxLines) {
+      if (length > 1024) {
         blocks += blockBuilder.toString()
         blockBuilder.clear()
-        line = 0
+        length = 0
       }
       blockBuilder.append(code)
-      val lineOfCode = CodeFormatter.stripExtraNewLinesAndComments(code).count(_ == '\n')
-      line += (if (lineOfCode == 0) 1 else lineOfCode)
+      length += CodeFormatter.stripExtraNewLinesAndComments(code).length
     }
     blocks += blockBuilder.toString()
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index ee94cdd0971e3..58323740b80cc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -586,15 +586,6 @@ object SQLConf {
     .intConf
     .createWithDefault(CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT)
 
-  val CODEGEN_MAX_LINES_PER_FUNC =
-    buildConf("spark.sql.codegen.expressions.maxCodegenLinesPerFunction")
-      .internal()
-      .doc("The maximum line number of a single Java function splited by " +
-        "CodeGenerator.splitExpression() method. This threshold is defined to reduce " +
-        "possibility of exceeding 64KB of Java bytecode size of the splitted method.")
-      .intConf
-      .createWithDefault(100)
-
   val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes")
     .doc("The maximum number of bytes to pack into a single partition when reading files.")
     .longConf
@@ -1070,8 +1061,6 @@ class SQLConf extends Serializable with Logging {
 
   def hugeMethodLimit: Int = getConf(WHOLESTAGE_HUGE_METHOD_LIMIT)
 
-  def maxCodegenLinesPerFunction: Int = getConf(CODEGEN_MAX_LINES_PER_FUNC)
-
   def tableRelationCacheSize: Int =
     getConf(StaticSQLConf.FILESOURCE_TABLE_RELATION_CACHE_SIZE)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala
index 293516cc643ef..9dcaca0ca93ee 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala
@@ -42,12 +42,12 @@ class BenchmarkWideTable extends BenchmarkBase {
     /**
      * Here are some numbers with different split threshold:
      *
-     *  Split threshold      Rate(M/s)   Per Row(ns)
-     *  10                   1.4         724.3
-     *  80                   1.5         682.6
-     *  100                  1.7         599.1
-     *  128                  1.5         678.8
-     *  1024                 0.7         1372.1
+     *  Split threshold      methods       Rate(M/s)   Per Row(ns)
+     *  10                   400           0.4         2279
+     *  100                  200           0.6         1554
+     *  1k                   37            0.9         1116
+     *  8k                   5             0.5         2025
+     *  64k                  1             0.0        21649
      */
   }
 }

From 516a72a62cb579f2952c4b776afec0dc1826e590 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Tue, 10 Oct 2017 19:37:27 +0100
Subject: [PATCH 9/9] revert test case

---
 .../codegen/CodeFormatterSuite.scala          | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
index 9d0a41661beaa..a0f1a64b0ab08 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
@@ -53,6 +53,38 @@ class CodeFormatterSuite extends SparkFunSuite {
     assert(reducedCode.body === "/*project_c4*/")
   }
 
+  test("removing extra new lines and comments") {
+    val code =
+      """
+        |/*
+        |  * multi
+        |  * line
+        |  * comments
+        |  */
+        |
+        |public function() {
+        |/*comment*/
+        |  /*comment_with_space*/
+        |code_body
+        |//comment
+        |code_body
+        |  //comment_with_space
+        |
+        |code_body
+        |}
+      """.stripMargin
+
+    val reducedCode = CodeFormatter.stripExtraNewLinesAndComments(code)
+    assert(reducedCode ===
+      """
+        |public function() {
+        |code_body
+        |code_body
+        |code_body
+        |}
+      """.stripMargin)
+  }
+
   testCase("basic example") {
     """
       |class A {